Below is my code utilizing pygooglenews to scrape Google News:
def get_titles1(search):
news = [] # list of articles that will have NLP applied to it
search = gn.search(search, when = '2y') #when: duration; 2y = 2 years, 6m = 6 months, etc.
search_items = search['entries'] # list of articles retrieved before NLP
for item in search_items: # go through all of the items in search_items
try:
url = item.link
article = Article(url, config=config)
article.download()
article.parse()
authors = ", ".join(author for author in article.authors)
title = article.title
date = article.publish_date
text = article.text
image = article.top_image
videos = article.movies
url = article.url
article.nlp()
keywords = article.keywords
print("\n")
print(item.link)
print(f"Keywords: {keywords}")
print(f"Summary: {article.summary}")
report = {'title':article.title, 'link':article.link, 'published':article.published}
news.append(report)
except:
pass
return news
data = get_titles1('Los Angeles')
df = pd.DataFrame(data)
print(df)
Although I everything works when with the function and when I call it using the data variable, when trying to make a DataFrame, the result is empty. This is strange given that in the function, I created the result dictionary, which 'title', 'link', and 'published' all acting as column headers.
Does anybody know what is the issue? Please let me know as I am a beginner with Python. Thank you!