237 lines
5.0 KiB
Go
237 lines
5.0 KiB
Go
package storygraph
|
|
|
|
import (
|
|
"fmt"
|
|
"iter"
|
|
"maps"
|
|
"os"
|
|
"slices"
|
|
"sort"
|
|
"strings"
|
|
|
|
"github.com/joho/godotenv"
|
|
"golang.org/x/net/html"
|
|
"golang.org/x/net/html/atom"
|
|
)
|
|
|
|
var (
|
|
c *HTTPClient
|
|
)
|
|
|
|
type Book struct {
|
|
ID string
|
|
Name string
|
|
Link string
|
|
Image string
|
|
Rating string
|
|
Tags []string
|
|
}
|
|
|
|
func init() {
|
|
godotenv.Load(os.Getenv("DOTENV_DIR") + ".env")
|
|
cookie := os.Getenv("COOKIE")
|
|
if cookie == "" {
|
|
panic("COOKIE is not set")
|
|
}
|
|
|
|
c = New(cookie)
|
|
}
|
|
|
|
func GetLatestBooks() (map[string]*Book, error) {
|
|
fmt.Println("Fetching latest book recommendations...")
|
|
links := []Book{}
|
|
|
|
page := 0
|
|
for {
|
|
page++
|
|
fmt.Println("Fetching page", page)
|
|
|
|
resp, err := c.Get(fmt.Sprintf("https://app.thestorygraph.com/to-read/averagemarcus?page=%d", page))
|
|
if err != nil {
|
|
fmt.Println("Error making request:", err)
|
|
return nil, err
|
|
}
|
|
defer resp.Body.Close()
|
|
doc, err := html.Parse(resp.Body)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
bookFound := false
|
|
for n := range doc.Descendants() {
|
|
if n.Type == html.ElementNode && n.DataAtom == atom.Div {
|
|
for _, a := range n.Attr {
|
|
if a.Key == "data-book-id" {
|
|
bookFound = true
|
|
|
|
bookID := a.Val
|
|
name := getName(n.Descendants())
|
|
link := fmt.Sprintf("https://app.thestorygraph.com/books/%s", bookID)
|
|
tags := getTags(n.Descendants())
|
|
image := getImage(n.Descendants())
|
|
|
|
if !bookContains(links, bookID) {
|
|
links = append(links, Book{
|
|
ID: bookID,
|
|
Name: name,
|
|
Link: link,
|
|
Image: image,
|
|
Rating: getRating(bookID),
|
|
Tags: tags,
|
|
})
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
if !bookFound {
|
|
break
|
|
}
|
|
}
|
|
|
|
sort.Slice(links, func(i, j int) bool {
|
|
return links[i].Rating > links[j].Rating
|
|
})
|
|
|
|
return map[string]*Book{
|
|
"Fiction": nextByTag(links, "Fiction"),
|
|
"Non-Fiction": nextByTag(links, "Non-Fiction"),
|
|
"Health": nextByTag(links, "Health"),
|
|
"Art": nextByTag(links, "Art"),
|
|
"Business": nextByTag(links, "Business"),
|
|
"Technology": nextByTag(links, "Technology"),
|
|
"Sci-Fi": nextByTag(links, "Sci-Fi"),
|
|
"Fantasy": nextByTag(links, "Fantasy"),
|
|
"Comics": nextByTag(links, "Comics"),
|
|
}, nil
|
|
}
|
|
|
|
func getRating(bookID string) string {
|
|
resp, err := c.Get(fmt.Sprintf("https://app.thestorygraph.com/books/%s/community_reviews", bookID))
|
|
if err != nil {
|
|
fmt.Println("Error fetching book rating:", err)
|
|
return "0.0"
|
|
}
|
|
defer resp.Body.Close()
|
|
|
|
doc, err := html.Parse(resp.Body)
|
|
if err != nil {
|
|
panic(err)
|
|
}
|
|
for n := range doc.Descendants() {
|
|
if n.Type == html.ElementNode && n.DataAtom == atom.Span {
|
|
for _, a := range n.Attr {
|
|
if a.Key == "class" && strings.Contains(a.Val, "average-star-rating") {
|
|
for t := range n.Descendants() {
|
|
if t.Type == html.TextNode {
|
|
return strings.TrimSpace(t.Data)
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
fmt.Println("Error fetching book rating: no rating found for book", bookID)
|
|
return "0.0"
|
|
}
|
|
|
|
func getName(decs iter.Seq[*html.Node]) string {
|
|
for n := range decs {
|
|
if n.Type == html.ElementNode && n.DataAtom == atom.Img {
|
|
for _, a := range n.Attr {
|
|
if a.Key == "alt" {
|
|
return a.Val
|
|
}
|
|
}
|
|
}
|
|
}
|
|
return ""
|
|
}
|
|
|
|
func getImage(decs iter.Seq[*html.Node]) string {
|
|
for n := range decs {
|
|
if n.Type == html.ElementNode && n.DataAtom == atom.Img {
|
|
for _, a := range n.Attr {
|
|
if a.Key == "src" {
|
|
return a.Val
|
|
}
|
|
}
|
|
}
|
|
}
|
|
return ""
|
|
}
|
|
|
|
func getTags(decs iter.Seq[*html.Node]) []string {
|
|
tags := map[string]bool{}
|
|
|
|
for n := range decs {
|
|
if n.Type == html.ElementNode && n.DataAtom == atom.Div {
|
|
for _, a := range n.Attr {
|
|
if a.Key == "class" && strings.Contains(a.Val, "book-pane-tag-section") {
|
|
for t := range n.Descendants() {
|
|
if t.Type == html.ElementNode && (t.DataAtom == atom.Span || t.DataAtom == atom.A) {
|
|
for b := range t.Descendants() {
|
|
if b.Type == html.TextNode {
|
|
switch b.Data {
|
|
case "fiction":
|
|
tags["Fiction"] = true
|
|
case "nonfiction":
|
|
tags["Non-Fiction"] = true
|
|
case "psychology":
|
|
fallthrough
|
|
case "self help":
|
|
fallthrough
|
|
case "health":
|
|
tags["Health"] = true
|
|
case "art":
|
|
tags["Art"] = true
|
|
case "business":
|
|
tags["Business"] = true
|
|
case "technology":
|
|
fallthrough
|
|
case "computer science":
|
|
tags["Technology"] = true
|
|
case "science fiction":
|
|
tags["Sci-Fi"] = true
|
|
case "fantasy":
|
|
tags["Fantasy"] = true
|
|
case "comics":
|
|
tags["Comics"] = true
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// Only include comics in the "comics" category
|
|
if tags["Comics"] {
|
|
tags = map[string]bool{"Comics": true}
|
|
}
|
|
|
|
return slices.Collect(maps.Keys(tags))
|
|
}
|
|
|
|
func bookContains(links []Book, bookID string) bool {
|
|
for _, b := range links {
|
|
if b.ID == bookID {
|
|
return true
|
|
}
|
|
}
|
|
return false
|
|
}
|
|
|
|
func nextByTag(links []Book, tag string) *Book {
|
|
for _, b := range links {
|
|
if slices.Contains(b.Tags, tag) {
|
|
return &b
|
|
}
|
|
}
|
|
return nil
|
|
}
|