From 032351e8f100c45f0d8109361b94f8b2c3176b9a Mon Sep 17 00:00:00 2001 From: Marcus Noble Date: Sun, 21 Mar 2021 08:42:56 +0000 Subject: [PATCH] Added webpage --- .gitignore | 3 ++ Dockerfile | 2 +- go.mod | 3 +- index.html | 124 +++++++++++++++++++++++++++++++++++++++++++++++++++++ main.go | 122 +++++++++++++++++++++++++++++++++------------------- 5 files changed, 207 insertions(+), 47 deletions(-) create mode 100644 index.html diff --git a/.gitignore b/.gitignore index ad062f4..fd91e2c 100644 --- a/.gitignore +++ b/.gitignore @@ -168,3 +168,6 @@ temp/ Network Trash Folder Temporary Items .apdisk + +*.fiber.gz +.vscode diff --git a/Dockerfile b/Dockerfile index 85d5774..2f4529c 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,4 @@ -FROM golang:alpine AS builder +FROM golang:1.16-alpine AS builder RUN apk update && apk add --no-cache git && apk add -U --no-cache ca-certificates WORKDIR /app/ ADD go.mod go.sum ./ diff --git a/go.mod b/go.mod index 5b3de66..7712d9f 100644 --- a/go.mod +++ b/go.mod @@ -1,8 +1,9 @@ module feed-fetcher -go 1.15 +go 1.16 require ( + github.com/PuerkitoBio/goquery v1.5.1 github.com/gofiber/fiber/v2 v2.6.0 github.com/mmcdole/gofeed v1.1.0 ) diff --git a/index.html b/index.html new file mode 100644 index 0000000..91597f8 --- /dev/null +++ b/index.html @@ -0,0 +1,124 @@ + + + + feed-fetcher + + + + + + + + + + + + + + + + + + + +
+

+ feed-fetcher + +

+
Returns the RSS feed associated with the given URL
+ +

+ Enter an URL and press "Fetch Feed" to see all feeds (if any) found associated with that page. +

+ +
+
+
+ + + +
+
+
+
+
+
+ +
+ +

+ Alternatively, you can navigate to https://feed-fetcher.cluster.fun/?url=YOUR_URL_HERE and your browser will redirect to the associated feed URL if found. +

+

+

Calling as an API:

+ If you set the Content-Type request header to application/json the response will return as a JSON array of all found feed URLs. If no feeds are found an empty array will be returned and the response status code will be 404. If multiple feeds are found all will be returned in the array with a response status code of 300. +

+✨ curl -H "Content-Type: application/json" https://feed-fetcher.cluster.fun/\?url\=https://marcusnoble.co.uk
+HTTP/1.1 200 OK
+Date: Sun, 21 Mar 2021 07:24:37 GMT
+Content-Type: application/json
+Content-Length: 38
+
+[
+    "https://marcusnoble.co.uk/feed.xml"
+]
+        
+

+ +
+ Source code available on GitHub, GitLab, Bitbucket & my own Gitea server. +
+
+ +
+
+
+ +
+
+
+ + + + diff --git a/main.go b/main.go index 9bb5c01..d3fd5cc 100644 --- a/main.go +++ b/main.go @@ -7,13 +7,18 @@ import ( "os" "strings" + "embed" + "github.com/PuerkitoBio/goquery" "github.com/gofiber/fiber/v2" "github.com/mmcdole/gofeed" ) +//go:embed index.html + +var content embed.FS + func main() { - fp := gofeed.NewParser() port, ok := os.LookupEnv("PORT") if !ok { port = "8080" @@ -24,56 +29,32 @@ func main() { app.Get("/", func(c *fiber.Ctx) error { feedUrl := c.Query("url") if feedUrl == "" { - fmt.Println("No URL provided") - return c.SendStatus(fiber.StatusBadRequest) + c.Type("html", "UTF8") + body, _ := content.ReadFile("index.html") + return c.Send(body) } - _, err := fp.ParseURL(feedUrl) - if err != nil && err == gofeed.ErrFeedTypeNotDetected { - res, err := http.Get(feedUrl) - if err != nil { - fmt.Println("Failed to fetch URL") - return c.SendStatus(fiber.StatusInternalServerError) + feeds, statusCode := getFeeds(feedUrl) + + if c.Is("json") { + if statusCode >= 400 || statusCode == 300 { + c.Status(statusCode) } - defer res.Body.Close() - if res.StatusCode >= 400 { - fmt.Println("Provided URL returned an error status code") - return c.SendStatus(res.StatusCode) + return c.JSON(feeds) + } else { + c.Status(statusCode) + c.Location(feeds[0]) + + if len(feeds) > 1 { + responseBody := "Multiple Choices\n\n" + for _, feed := range feeds { + responseBody += feed + "\n" + } + return c.SendString(responseBody) } - doc, err := goquery.NewDocumentFromReader(res.Body) - if err != nil { - fmt.Println("Failed to parse response body") - return c.SendStatus(fiber.StatusInternalServerError) - } - - matches := doc.Find(`[rel="alternate"][type="application/rss+xml"]`) - if matches.Length() == 0 { - fmt.Println("No RSS feeds found on page") - return c.SendStatus(fiber.StatusNotFound) - } - - foundUrl, ok := matches.First().Attr("href") - if !ok { - fmt.Println("href attribute missing from tag") - return c.SendStatus(fiber.StatusNotFound) - } - c.Set("Location", absoluteUrl(feedUrl, foundUrl)) - if matches.Length() > 1 { - fmt.Println("Multiple feeds found on page") - return c.SendStatus(fiber.StatusMultipleChoices) - } else { - fmt.Println("Feed found on page") - return c.SendStatus(fiber.StatusTemporaryRedirect) - } - } else if err != nil { - fmt.Println("Failed while attempting to parse feed") - return c.SendStatus(fiber.StatusInternalServerError) + return c.Send(nil) } - - fmt.Println("URL provided is already a feed") - c.Set("Location", feedUrl) - return c.SendStatus(fiber.StatusMovedPermanently) }) fmt.Println(app.Listen(fmt.Sprintf(":%s", port))) @@ -87,3 +68,54 @@ func absoluteUrl(requestUrl, foundUrl string) string { return foundUrl } + +func getFeeds(requestURL string) ([]string, int) { + feeds := []string{} + + fp := gofeed.NewParser() + _, err := fp.ParseURL(requestURL) + if err == nil { + feeds = []string{requestURL} + } else if err != nil && err == gofeed.ErrFeedTypeNotDetected { + res, err := http.Get(requestURL) + if err != nil { + fmt.Println("Failed to fetch URL") + return feeds, fiber.StatusInternalServerError + } + defer res.Body.Close() + + if res.StatusCode >= 400 { + fmt.Println("Provided URL returned an error status code") + return feeds, res.StatusCode + } + + doc, err := goquery.NewDocumentFromReader(res.Body) + if err != nil { + fmt.Println("Failed to parse response body") + return feeds, fiber.StatusInternalServerError + } + + matches := doc.Find(`[rel="alternate"][type="application/rss+xml"]`) + if matches.Length() == 0 { + fmt.Println("No RSS feeds found on page") + return feeds, fiber.StatusNotFound + } + + matches.Each(func(i int, s *goquery.Selection) { + feeds = append(feeds, absoluteUrl(requestURL, s.AttrOr("href", ""))) + }) + + if matches.Length() > 1 { + fmt.Println("Multiple feeds found on page") + return feeds, fiber.StatusMultipleChoices + } else { + fmt.Println("Feed found on page") + return feeds, fiber.StatusTemporaryRedirect + } + } else if err != nil { + fmt.Println("Failed while attempting to parse feed") + return feeds, fiber.StatusInternalServerError + } + + return feeds, 200 +}