Thanks for coming by... I really appreciate your help. I'm trying to scrape simple craigslist lists, and this code won't work... please help!! Returning empty list [ ] ... Please help... The code is followed below..
importing packages
from robobrowser import RoboBrowser
import sys, codecs, locale
import pandas as pd
browser = RoboBrowser(history=True, parser='html.parser')
defining function to scrape data from trips
def getTrips(website) :
browser.open(website)
trips = browser.find_all(class_='result-info')
data = []
for trip in trips:
title = get_title(trip)
url = get_url(trip)
data.append({
"title": title,
"url": url,
"website": website
})
next_page = browser.get_link('next >')
if next_page:
getTrips(browser._build_url(next_page.get('href')))
return data
def get_title(trip):
if trip.find(class_='result-title hdrlnk'):
return trip.find(class_='result-title hdrlnk').text
else:
return "Title not found"
def get_url(trip):
if trip.find(class_='result-info'):
return item.find('a').get('href')
else:
return "URL not found"
make aggregated list (*final output = dictionaries with a list called "total")
total = []
total.extend(getTrips('https://newyork.craigslist.org/search/bbb?query=photographer&sort=rel'))
print (testing purpose)
print(total)
Exporting data into a csv file
df = pd.DataFrame(total)
df.to_csv('photographer_data.csv', index=False)