diff --git a/.gitignore b/.gitignore
index ad062f4..fd91e2c 100644
--- a/.gitignore
+++ b/.gitignore
@@ -168,3 +168,6 @@ temp/
Network Trash Folder
Temporary Items
.apdisk
+
+*.fiber.gz
+.vscode
diff --git a/Dockerfile b/Dockerfile
index 85d5774..2f4529c 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -1,4 +1,4 @@
-FROM golang:alpine AS builder
+FROM golang:1.16-alpine AS builder
RUN apk update && apk add --no-cache git && apk add -U --no-cache ca-certificates
WORKDIR /app/
ADD go.mod go.sum ./
diff --git a/go.mod b/go.mod
index 5b3de66..7712d9f 100644
--- a/go.mod
+++ b/go.mod
@@ -1,8 +1,9 @@
module feed-fetcher
-go 1.15
+go 1.16
require (
+ github.com/PuerkitoBio/goquery v1.5.1
github.com/gofiber/fiber/v2 v2.6.0
github.com/mmcdole/gofeed v1.1.0
)
diff --git a/index.html b/index.html
new file mode 100644
index 0000000..91597f8
--- /dev/null
+++ b/index.html
@@ -0,0 +1,124 @@
+
+
+
+ feed-fetcher
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ feed-fetcher
+
+
+
Returns the RSS feed associated with the given URL
+
+
+ Enter an URL and press "Fetch Feed" to see all feeds (if any) found associated with that page.
+
+
+
+
+
+
+
+
+
+
+ Alternatively , you can navigate to https://feed-fetcher.cluster.fun/?url=YOUR_URL_HERE and your browser will redirect to the associated feed URL if found.
+
+
+
Calling as an API:
+ If you set the
Content-Type request header to
application/json the response will return as a JSON array of all found feed URLs. If no feeds are found an empty array will be returned and the response status code will be
404 . If multiple feeds are found all will be returned in the array with a response status code of
300 .
+
+✨ curl -H "Content-Type: application/json" https://feed-fetcher.cluster.fun/\?url\=https://marcusnoble.co.uk
+HTTP/1.1 200 OK
+Date: Sun, 21 Mar 2021 07:24:37 GMT
+Content-Type: application/json
+Content-Length: 38
+
+[
+ "https://marcusnoble.co.uk/feed.xml"
+]
+
+
+
+
+
+
+
+
+
+
+
diff --git a/main.go b/main.go
index 9bb5c01..d3fd5cc 100644
--- a/main.go
+++ b/main.go
@@ -7,13 +7,18 @@ import (
"os"
"strings"
+ "embed"
+
"github.com/PuerkitoBio/goquery"
"github.com/gofiber/fiber/v2"
"github.com/mmcdole/gofeed"
)
+//go:embed index.html
+
+var content embed.FS
+
func main() {
- fp := gofeed.NewParser()
port, ok := os.LookupEnv("PORT")
if !ok {
port = "8080"
@@ -24,56 +29,32 @@ func main() {
app.Get("/", func(c *fiber.Ctx) error {
feedUrl := c.Query("url")
if feedUrl == "" {
- fmt.Println("No URL provided")
- return c.SendStatus(fiber.StatusBadRequest)
+ c.Type("html", "UTF8")
+ body, _ := content.ReadFile("index.html")
+ return c.Send(body)
}
- _, err := fp.ParseURL(feedUrl)
- if err != nil && err == gofeed.ErrFeedTypeNotDetected {
- res, err := http.Get(feedUrl)
- if err != nil {
- fmt.Println("Failed to fetch URL")
- return c.SendStatus(fiber.StatusInternalServerError)
+ feeds, statusCode := getFeeds(feedUrl)
+
+ if c.Is("json") {
+ if statusCode >= 400 || statusCode == 300 {
+ c.Status(statusCode)
}
- defer res.Body.Close()
- if res.StatusCode >= 400 {
- fmt.Println("Provided URL returned an error status code")
- return c.SendStatus(res.StatusCode)
+ return c.JSON(feeds)
+ } else {
+ c.Status(statusCode)
+ c.Location(feeds[0])
+
+ if len(feeds) > 1 {
+ responseBody := "Multiple Choices\n\n"
+ for _, feed := range feeds {
+ responseBody += feed + "\n"
+ }
+ return c.SendString(responseBody)
}
- doc, err := goquery.NewDocumentFromReader(res.Body)
- if err != nil {
- fmt.Println("Failed to parse response body")
- return c.SendStatus(fiber.StatusInternalServerError)
- }
-
- matches := doc.Find(`[rel="alternate"][type="application/rss+xml"]`)
- if matches.Length() == 0 {
- fmt.Println("No RSS feeds found on page")
- return c.SendStatus(fiber.StatusNotFound)
- }
-
- foundUrl, ok := matches.First().Attr("href")
- if !ok {
- fmt.Println("href attribute missing from tag")
- return c.SendStatus(fiber.StatusNotFound)
- }
- c.Set("Location", absoluteUrl(feedUrl, foundUrl))
- if matches.Length() > 1 {
- fmt.Println("Multiple feeds found on page")
- return c.SendStatus(fiber.StatusMultipleChoices)
- } else {
- fmt.Println("Feed found on page")
- return c.SendStatus(fiber.StatusTemporaryRedirect)
- }
- } else if err != nil {
- fmt.Println("Failed while attempting to parse feed")
- return c.SendStatus(fiber.StatusInternalServerError)
+ return c.Send(nil)
}
-
- fmt.Println("URL provided is already a feed")
- c.Set("Location", feedUrl)
- return c.SendStatus(fiber.StatusMovedPermanently)
})
fmt.Println(app.Listen(fmt.Sprintf(":%s", port)))
@@ -87,3 +68,54 @@ func absoluteUrl(requestUrl, foundUrl string) string {
return foundUrl
}
+
+func getFeeds(requestURL string) ([]string, int) {
+ feeds := []string{}
+
+ fp := gofeed.NewParser()
+ _, err := fp.ParseURL(requestURL)
+ if err == nil {
+ feeds = []string{requestURL}
+ } else if err != nil && err == gofeed.ErrFeedTypeNotDetected {
+ res, err := http.Get(requestURL)
+ if err != nil {
+ fmt.Println("Failed to fetch URL")
+ return feeds, fiber.StatusInternalServerError
+ }
+ defer res.Body.Close()
+
+ if res.StatusCode >= 400 {
+ fmt.Println("Provided URL returned an error status code")
+ return feeds, res.StatusCode
+ }
+
+ doc, err := goquery.NewDocumentFromReader(res.Body)
+ if err != nil {
+ fmt.Println("Failed to parse response body")
+ return feeds, fiber.StatusInternalServerError
+ }
+
+ matches := doc.Find(`[rel="alternate"][type="application/rss+xml"]`)
+ if matches.Length() == 0 {
+ fmt.Println("No RSS feeds found on page")
+ return feeds, fiber.StatusNotFound
+ }
+
+ matches.Each(func(i int, s *goquery.Selection) {
+ feeds = append(feeds, absoluteUrl(requestURL, s.AttrOr("href", "")))
+ })
+
+ if matches.Length() > 1 {
+ fmt.Println("Multiple feeds found on page")
+ return feeds, fiber.StatusMultipleChoices
+ } else {
+ fmt.Println("Feed found on page")
+ return feeds, fiber.StatusTemporaryRedirect
+ }
+ } else if err != nil {
+ fmt.Println("Failed while attempting to parse feed")
+ return feeds, fiber.StatusInternalServerError
+ }
+
+ return feeds, 200
+}