I have problem in scraping other languages keyword by using the scholarly or paperscraper.
I have below code (code link):
!pip install scholarly
And :
import scholarly
import time
import random
import csv
from openpyxl import Workbook
def slow_scrape_scholarly(keyword, num_results):
scholarly.scholarly.set_timeout(30)
results = []
# Search for publications based on the keyword
search_query = scholarly.scholarly.search_keyword(keyword)
for i in range(num_results):
try:
# Get the first result from the search query
pub = next(search_query)
bib = pub.get('bib')
# Check if the 'title' key is present
if bib is not None:
# Extract the author name and publication information
print(bib['title'])
pub_url = pub.get('pub_url')
print(pub_url)
author = bib['author']
# Convert list of author names to comma-separated string
author_str = ', '.join(author) if isinstance(author, list) else author
publication = bib['title']
pub_url = pub.get('pub_url')
result = {
'author': author_str,
'publication': publication,
'pub_url': pub_url
}
results.append(result)
# Write current result to a CSV file
with open('scholarly_results.csv', mode='a', newline='') as file:
writer = csv.writer(file)
writer.writerow([result['author'], result['publication'], result['pub_url']])
# Write current result to an Excel file
wb = Workbook()
ws = wb.active
ws.title = 'Scholarly Results'
ws.append(['Author', 'Publication', 'Publication URL'])
for r in results:
ws.append([r['author'], r['publication'], r['pub_url']])
wb.save('scholarly_results.xlsx')
else:
print("Skipping result due to missing 'title' key")
except StopIteration:
# If there are no more results, break out of the loop
break
# Simulate a human-like delay between requests
delay = random.uniform(10.1, 25.0)
print(f"Waiting for {delay:.2f} seconds before the next request...")
time.sleep(delay)
return results
# Example usage
keyword = 'خشونت علیه زنان'
def get_total_results(keyword):
search_query = scholarly.scholarly.search_pubs(keyword)
total_results = search_query.total_results
return total_results
# Example usage
#keyword = 'violence against women'
def get_total_results(keyword):
search_query = scholarly.scholarly.search_keyword(keyword)
total_results = search_query.total_results
print(f"Total number for the {keyword} search is {total_results}")
return total_results
num_results = get_total_results(keyword)
results = slow_scrape_scholarly(keyword, num_results)
try:
num_results = get_total_results(keyword)
results = slow_scrape_scholarly(keyword, num_results)
except:
print("Error getting total results")
And I get the below error:
------------------------------------------ AttributeError Traceback (most recent call last) <ipython-input-3-926f6369e88d> in
<cell line: 80>()
78 print(f"Total number for the {keyword} search is {total_results}")
79 return total_results
---> 80 num_results = get_total_results(keyword)
81 results = slow_scrape_scholarly(keyword, num_results)
82
<ipython-input-3-926f6369e88d> in get_total_results(keyword)
75 def get_total_results(keyword):
76 search_query = scholarly.scholarly.search_keyword(keyword)
---> 77 total_results = search_query.total_results
78 print(f"Total number for the {keyword} search is {total_results}")
79 return total_results
AttributeError: 'generator' object has no attribute 'total_results'