gopherss/internal/feeds/refresh.go

127 lines
2.8 KiB
Go
Raw Normal View History

2020-10-17 13:30:30 +00:00
package feeds
import (
"encoding/base64"
"fmt"
2021-03-17 08:02:14 +00:00
"net/http"
"net/url"
2020-10-17 13:30:30 +00:00
"strings"
"time"
2021-03-17 08:02:14 +00:00
"github.com/PuerkitoBio/goquery"
2020-10-17 13:30:30 +00:00
"github.com/mmcdole/gofeed"
"github.com/spf13/viper"
)
var fp = gofeed.NewParser()
var feedStore = &FeedStore{}
func Refresh() error {
2020-10-17 19:55:05 +00:00
interval := viper.GetInt("REFRESH_TIMEOUT")
2020-10-17 13:30:30 +00:00
for {
fmt.Println("Refreshing feeds...")
for _, feed := range *feedStore.GetFeeds() {
go RefreshFeed(feed.FeedURL)
}
2020-11-08 19:51:05 +00:00
fmt.Println("Reaping old items...")
feedStore.DeleteOldReadItems()
2020-10-17 13:30:30 +00:00
fmt.Printf("Going to sleep for %d minutes\n", interval)
time.Sleep(time.Duration(interval) * time.Minute)
}
}
2021-03-17 08:02:14 +00:00
func RefreshFeed(feedUrl string) Feed {
fmt.Printf("Refreshing %s\n", feedUrl)
2020-10-17 18:02:52 +00:00
var feed Feed
2021-03-17 08:02:14 +00:00
f, err := fp.ParseURL(feedUrl)
if err != nil && err == gofeed.ErrFeedTypeNotDetected {
foundFeed := loadFeedFromWebpage(feedUrl)
if foundFeed != nil {
feed = *foundFeed
}
} else if err != nil {
fmt.Printf("Failed to refresh %s\n%v\n", feedUrl, err)
2020-10-17 13:30:30 +00:00
} else {
imageURL := ""
if f.Image != nil {
imageURL = f.Image.URL
}
2020-10-17 18:02:52 +00:00
feed = Feed{
2021-03-17 08:02:14 +00:00
ID: strings.ReplaceAll(base64.StdEncoding.EncodeToString([]byte(feedUrl)), "/", ""),
2020-10-17 13:30:30 +00:00
Title: f.Title,
Description: f.Description,
HomepageURL: f.Link,
2021-03-17 08:02:14 +00:00
FeedURL: feedUrl,
2020-10-17 13:30:30 +00:00
ImageURL: imageURL,
LastUpdated: f.UpdatedParsed,
Items: []Item{},
}
for _, item := range f.Items {
imageURL := ""
if f.Image != nil {
imageURL = f.Image.URL
}
2021-02-21 09:09:25 +00:00
createdTime := item.PublishedParsed
if createdTime == nil {
createdTime = item.UpdatedParsed
}
2020-10-17 13:30:30 +00:00
feed.Items = append(feed.Items, Item{
ID: strings.ReplaceAll(base64.StdEncoding.EncodeToString([]byte(item.GUID)), "/", ""),
Title: item.Title,
Description: item.Description,
Content: item.Content,
URL: item.Link,
ImageURL: imageURL,
LastUpdated: item.UpdatedParsed,
2021-02-21 09:09:25 +00:00
Created: createdTime,
2020-10-17 13:30:30 +00:00
GUID: item.GUID,
FeedID: feed.ID,
})
}
feedStore.SaveFeed(feed)
fmt.Printf("Finished refreshing '%s'\n", feed.Title)
}
2020-10-17 18:02:52 +00:00
return feed
2020-10-17 13:30:30 +00:00
}
2021-03-17 08:02:14 +00:00
func loadFeedFromWebpage(webpageUrl string) *Feed {
res, err := http.Get(webpageUrl)
if err != nil {
fmt.Println(err)
return nil
}
defer res.Body.Close()
if res.StatusCode != 200 {
fmt.Printf("status code error: %d %s", res.StatusCode, res.Status)
return nil
}
doc, err := goquery.NewDocumentFromReader(res.Body)
if err != nil {
fmt.Println(err)
return nil
}
feedUrl, ok := doc.Find(`[rel="alternate"][type="application/rss+xml"]`).First().Attr("href")
if ok {
if !strings.HasPrefix(feedUrl, "http") {
parsedUrl, _ := url.Parse(webpageUrl)
feedUrl = fmt.Sprintf("%s://%s%s", parsedUrl.Scheme, parsedUrl.Host, feedUrl)
}
feed := RefreshFeed(feedUrl)
return &feed
}
return nil
}