diff --git a/go.mod b/go.mod index be3c747..37527b4 100644 --- a/go.mod +++ b/go.mod @@ -5,4 +5,5 @@ go 1.24.0 require ( github.com/joho/godotenv v1.5.1 golang.org/x/net v0.39.0 + golang.org/x/time v0.11.0 ) diff --git a/go.sum b/go.sum index cd11744..eb3a874 100644 --- a/go.sum +++ b/go.sum @@ -2,3 +2,5 @@ github.com/joho/godotenv v1.5.1 h1:7eLL/+HRGLY0ldzfGMeQkb7vMd0as4CfYvUVzLqw0N0= github.com/joho/godotenv v1.5.1/go.mod h1:f4LDr5Voq0i2e/R5DDNOoa2zzDfwtkZa6DnEwAbqwq4= golang.org/x/net v0.39.0 h1:ZCu7HMWDxpXpaiKdhzIfaltL9Lp31x/3fCP11bc6/fY= golang.org/x/net v0.39.0/go.mod h1:X7NRbYVEA+ewNkCNyJ513WmMdQ3BineSwVtN2zD/d+E= +golang.org/x/time v0.11.0 h1:/bpjEDfN9tkoN/ryeYHnv5hcMlc8ncjMcM4XBk5NWV0= +golang.org/x/time v0.11.0/go.mod h1:CDIdPxbZBQxdj6cxyCIdrNogrJKMJ7pr37NYpMcMDSg= diff --git a/pkg/storygraph/client.go b/pkg/storygraph/client.go index f1751b0..6ba3396 100644 --- a/pkg/storygraph/client.go +++ b/pkg/storygraph/client.go @@ -4,7 +4,6 @@ import ( "fmt" "iter" "maps" - "net/http" "os" "slices" "sort" @@ -16,8 +15,7 @@ import ( ) var ( - COOKIE string - c http.Client + c *HTTPClient ) type Book struct { @@ -31,12 +29,12 @@ type Book struct { func init() { godotenv.Load(os.Getenv("DOTENV_DIR") + ".env") - COOKIE = os.Getenv("COOKIE") - if COOKIE == "" { + cookie := os.Getenv("COOKIE") + if cookie == "" { panic("COOKIE is not set") } - c = http.Client{} + c = New(cookie) } func GetLatestBooks() (map[string]*Book, error) { @@ -46,21 +44,12 @@ func GetLatestBooks() (map[string]*Book, error) { for { page++ - req, err := http.NewRequest("GET", fmt.Sprintf("https://app.thestorygraph.com/to-read/averagemarcus?page=%d", page), nil) - if err != nil { - return nil, err - } - req.Header.Set("Cookie", COOKIE) - - resp, err := c.Do(req) + resp, err := c.Get(fmt.Sprintf("https://app.thestorygraph.com/to-read/averagemarcus?page=%d", page)) if err != nil { + fmt.Println("Error making request:", err) return nil, err } defer resp.Body.Close() - if resp.StatusCode != 200 { - break - } - doc, err := html.Parse(resp.Body) if err != nil { return nil, err @@ -116,21 +105,12 @@ func GetLatestBooks() (map[string]*Book, error) { } func getRating(bookID string) string { - req, err := http.NewRequest("GET", fmt.Sprintf("https://app.thestorygraph.com/books/%s/community_reviews", bookID), nil) + resp, err := c.Get(fmt.Sprintf("https://app.thestorygraph.com/books/%s/community_reviews", bookID)) if err != nil { - panic(err) - } - req.Header.Set("Cookie", COOKIE) - - resp, err := c.Do(req) - if err != nil { - panic(err) + fmt.Println("Error fetching book rating:", resp.StatusCode) + return "0.0" } defer resp.Body.Close() - if resp.StatusCode != 200 { - fmt.Println("Error fetching book rating:", resp.StatusCode) - return "" - } doc, err := html.Parse(resp.Body) if err != nil { @@ -145,14 +125,13 @@ func getRating(bookID string) string { return strings.TrimSpace(t.Data) } } - } } } } - fmt.Println("Error fetching book rating: no rating found") - return "0" + fmt.Println("Error fetching book rating: no rating found for book", bookID) + return "0.0" } func getName(decs iter.Seq[*html.Node]) string { @@ -189,7 +168,7 @@ func getTags(decs iter.Seq[*html.Node]) []string { for _, a := range n.Attr { if a.Key == "class" && strings.Contains(a.Val, "book-pane-tag-section") { for t := range n.Descendants() { - if t.Type == html.ElementNode && t.DataAtom == atom.Span { + if t.Type == html.ElementNode && (t.DataAtom == atom.Span || t.DataAtom == atom.A) { for b := range t.Descendants() { if b.Type == html.TextNode { switch b.Data { diff --git a/pkg/storygraph/http.go b/pkg/storygraph/http.go new file mode 100644 index 0000000..2c0ea07 --- /dev/null +++ b/pkg/storygraph/http.go @@ -0,0 +1,68 @@ +package storygraph + +import ( + "context" + "fmt" + "net/http" + "time" + + "golang.org/x/time/rate" +) + +type HTTPClient struct { + Cookie string + + client *http.Client + rl *rate.Limiter + ctx context.Context + retries int +} + +func New(cookie string) *HTTPClient { + return &HTTPClient{ + Cookie: cookie, + + client: &http.Client{}, + rl: rate.NewLimiter(rate.Every(1*time.Second), 15), + ctx: context.Background(), + retries: 3, + } +} + +func (h *HTTPClient) Get(url string) (*http.Response, error) { + for h.retries > 0 { + if err := h.rl.Wait(h.ctx); err != nil { + fmt.Println("Error waiting for rate limiter:", err) + return nil, err + } + + req, err := http.NewRequest("GET", url, nil) + if err != nil { + fmt.Println("Error creating request:", err) + return nil, err + } + + req.Header.Set("Cookie", h.Cookie) + resp, err := h.client.Do(req) + if err != nil { + fmt.Println("Error making request:", err) + return nil, err + } + + if resp.StatusCode == 429 { + fmt.Println("Rate limit exceeded, retrying...") + h.retries-- + continue + } + + if resp.StatusCode != 200 { + fmt.Println("Error fetching page:", resp.StatusCode) + return resp, err + } + + return resp, nil + } + + fmt.Println("Max retries exceeded") + return nil, fmt.Errorf("max retries exceeded") +}