Issues with colly to scrape fixture data correctly

63 views Asked by At

I am currently attempting to scrape this website to get at least the first fixture using the colly package. I eventually want to make it so it scrapes every fixture on todays date. I believe I am populating the fixture incorrectly and wondering what is going wrong in my code. This is the website

// scraper.go

package main

import (
    "fmt"
    // Colly
    "github.com/gocolly/colly"
)

type Fixture struct {
    League   string
    Winner   string
    Loser    string
    WinScore string
    LosScore string
}

func main() {
    fmt.Println("Starting programm")

    c := colly.NewCollector(
        colly.AllowedDomains("www.goal.com", "goal.com"),
        colly.Async(true),
    )

    c.OnHTML("div[class='competition_competition_wbjsu']", func(e *colly.HTMLElement) {
        fixture := Fixture{
            League:   e.ChildText("div[class='competition_name__093QA']"),
            Winner:   e.ChildText("h4[class='name_name__qsruk name_winner_djsbB']"),
            Loser:    e.ChildText("h4[class='name_name__qsruk']:not(.name_winner_djsbB)"), // Added :not pseudo-class to exclude winner
            WinScore: e.ChildText("p[class='result_team-a__jx1EM']"),
            LosScore: e.ChildText("p[class='result_team-b_kNMbF']"),
        }
        fmt.Printf("%+v\n", fixture)
    })

    c.OnError(func(r *colly.Response, err error) {
        fmt.Println("Request URL:", r.Request.URL, "failed with response:", r, "\nError:", err)
    })

    url := "https://www.goal.com/en-us/live-scores"
    c.Visit(url)

    c.Wait()
    fmt.Println("End of Program")
}

I have tried changing the e.ChildText() multiple times but I can not get it to be correct. I understand that the class names need to be unique but currently I just want to be able to scrape something from the site correctly.

0

There are 0 answers