Improved HTTP client with rate limiting and retries

Signed-off-by: Marcus Noble <github@marcusnoble.co.uk>
This commit is contained in:
2025-05-06 13:37:43 +01:00
parent f5041fcb91
commit 4b9fb0afe6
4 changed files with 83 additions and 33 deletions

1
go.mod
View File

@@ -5,4 +5,5 @@ go 1.24.0
require ( require (
github.com/joho/godotenv v1.5.1 github.com/joho/godotenv v1.5.1
golang.org/x/net v0.39.0 golang.org/x/net v0.39.0
golang.org/x/time v0.11.0
) )

2
go.sum
View File

@@ -2,3 +2,5 @@ github.com/joho/godotenv v1.5.1 h1:7eLL/+HRGLY0ldzfGMeQkb7vMd0as4CfYvUVzLqw0N0=
github.com/joho/godotenv v1.5.1/go.mod h1:f4LDr5Voq0i2e/R5DDNOoa2zzDfwtkZa6DnEwAbqwq4= github.com/joho/godotenv v1.5.1/go.mod h1:f4LDr5Voq0i2e/R5DDNOoa2zzDfwtkZa6DnEwAbqwq4=
golang.org/x/net v0.39.0 h1:ZCu7HMWDxpXpaiKdhzIfaltL9Lp31x/3fCP11bc6/fY= golang.org/x/net v0.39.0 h1:ZCu7HMWDxpXpaiKdhzIfaltL9Lp31x/3fCP11bc6/fY=
golang.org/x/net v0.39.0/go.mod h1:X7NRbYVEA+ewNkCNyJ513WmMdQ3BineSwVtN2zD/d+E= golang.org/x/net v0.39.0/go.mod h1:X7NRbYVEA+ewNkCNyJ513WmMdQ3BineSwVtN2zD/d+E=
golang.org/x/time v0.11.0 h1:/bpjEDfN9tkoN/ryeYHnv5hcMlc8ncjMcM4XBk5NWV0=
golang.org/x/time v0.11.0/go.mod h1:CDIdPxbZBQxdj6cxyCIdrNogrJKMJ7pr37NYpMcMDSg=

View File

@@ -4,7 +4,6 @@ import (
"fmt" "fmt"
"iter" "iter"
"maps" "maps"
"net/http"
"os" "os"
"slices" "slices"
"sort" "sort"
@@ -16,8 +15,7 @@ import (
) )
var ( var (
COOKIE string c *HTTPClient
c http.Client
) )
type Book struct { type Book struct {
@@ -31,12 +29,12 @@ type Book struct {
func init() { func init() {
godotenv.Load(os.Getenv("DOTENV_DIR") + ".env") godotenv.Load(os.Getenv("DOTENV_DIR") + ".env")
COOKIE = os.Getenv("COOKIE") cookie := os.Getenv("COOKIE")
if COOKIE == "" { if cookie == "" {
panic("COOKIE is not set") panic("COOKIE is not set")
} }
c = http.Client{} c = New(cookie)
} }
func GetLatestBooks() (map[string]*Book, error) { func GetLatestBooks() (map[string]*Book, error) {
@@ -46,21 +44,12 @@ func GetLatestBooks() (map[string]*Book, error) {
for { for {
page++ page++
req, err := http.NewRequest("GET", fmt.Sprintf("https://app.thestorygraph.com/to-read/averagemarcus?page=%d", page), nil) resp, err := c.Get(fmt.Sprintf("https://app.thestorygraph.com/to-read/averagemarcus?page=%d", page))
if err != nil {
return nil, err
}
req.Header.Set("Cookie", COOKIE)
resp, err := c.Do(req)
if err != nil { if err != nil {
fmt.Println("Error making request:", err)
return nil, err return nil, err
} }
defer resp.Body.Close() defer resp.Body.Close()
if resp.StatusCode != 200 {
break
}
doc, err := html.Parse(resp.Body) doc, err := html.Parse(resp.Body)
if err != nil { if err != nil {
return nil, err return nil, err
@@ -116,21 +105,12 @@ func GetLatestBooks() (map[string]*Book, error) {
} }
func getRating(bookID string) string { func getRating(bookID string) string {
req, err := http.NewRequest("GET", fmt.Sprintf("https://app.thestorygraph.com/books/%s/community_reviews", bookID), nil) resp, err := c.Get(fmt.Sprintf("https://app.thestorygraph.com/books/%s/community_reviews", bookID))
if err != nil { if err != nil {
panic(err) fmt.Println("Error fetching book rating:", resp.StatusCode)
} return "0.0"
req.Header.Set("Cookie", COOKIE)
resp, err := c.Do(req)
if err != nil {
panic(err)
} }
defer resp.Body.Close() defer resp.Body.Close()
if resp.StatusCode != 200 {
fmt.Println("Error fetching book rating:", resp.StatusCode)
return ""
}
doc, err := html.Parse(resp.Body) doc, err := html.Parse(resp.Body)
if err != nil { if err != nil {
@@ -145,14 +125,13 @@ func getRating(bookID string) string {
return strings.TrimSpace(t.Data) return strings.TrimSpace(t.Data)
} }
} }
} }
} }
} }
} }
fmt.Println("Error fetching book rating: no rating found") fmt.Println("Error fetching book rating: no rating found for book", bookID)
return "0" return "0.0"
} }
func getName(decs iter.Seq[*html.Node]) string { func getName(decs iter.Seq[*html.Node]) string {
@@ -189,7 +168,7 @@ func getTags(decs iter.Seq[*html.Node]) []string {
for _, a := range n.Attr { for _, a := range n.Attr {
if a.Key == "class" && strings.Contains(a.Val, "book-pane-tag-section") { if a.Key == "class" && strings.Contains(a.Val, "book-pane-tag-section") {
for t := range n.Descendants() { for t := range n.Descendants() {
if t.Type == html.ElementNode && t.DataAtom == atom.Span { if t.Type == html.ElementNode && (t.DataAtom == atom.Span || t.DataAtom == atom.A) {
for b := range t.Descendants() { for b := range t.Descendants() {
if b.Type == html.TextNode { if b.Type == html.TextNode {
switch b.Data { switch b.Data {

68
pkg/storygraph/http.go Normal file
View File

@@ -0,0 +1,68 @@
package storygraph
import (
"context"
"fmt"
"net/http"
"time"
"golang.org/x/time/rate"
)
type HTTPClient struct {
Cookie string
client *http.Client
rl *rate.Limiter
ctx context.Context
retries int
}
func New(cookie string) *HTTPClient {
return &HTTPClient{
Cookie: cookie,
client: &http.Client{},
rl: rate.NewLimiter(rate.Every(1*time.Second), 15),
ctx: context.Background(),
retries: 3,
}
}
func (h *HTTPClient) Get(url string) (*http.Response, error) {
for h.retries > 0 {
if err := h.rl.Wait(h.ctx); err != nil {
fmt.Println("Error waiting for rate limiter:", err)
return nil, err
}
req, err := http.NewRequest("GET", url, nil)
if err != nil {
fmt.Println("Error creating request:", err)
return nil, err
}
req.Header.Set("Cookie", h.Cookie)
resp, err := h.client.Do(req)
if err != nil {
fmt.Println("Error making request:", err)
return nil, err
}
if resp.StatusCode == 429 {
fmt.Println("Rate limit exceeded, retrying...")
h.retries--
continue
}
if resp.StatusCode != 200 {
fmt.Println("Error fetching page:", resp.StatusCode)
return resp, err
}
return resp, nil
}
fmt.Println("Max retries exceeded")
return nil, fmt.Errorf("max retries exceeded")
}