In the code , I'm doing the playstore scraping of the similar apps through the inspect-css element or tag. Which parse gives the two urls for the given website which refer to the similar apps and dev apps. In the similar apps the apps url are displayed untill 50 . On the furthrt debug i saw that the page scrolling is not done and the apps present over there are not loaded .. for that i have used the script to scroll the page to the end of the page but there the wait is not working and it is only printing the 50 app details.....Here is the code
import datetime
from urllib.parse import urlparse, parse_qs
import scrapy
from scrapy_selenium import SeleniumRequest
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
class AndroidCrawlerSpider(scrapy.Spider):
name = "android_crawler"
allowed_domains = ["play.google.com"]
start_urls = ["https://play.google.com/store/apps/details?id=com.facebook.katana&hl=en&gl=US"]
def __init__(self, *args, **kwargs):
super(AndroidCrawlerSpider, self).__init__(*args, **kwargs)
self.dict = {}
def closed(self, reason):
print('Spider is closing now')
def parse(self, response):
self.dict['App'] = {}
parsed_url = urlparse(response.url)
query_params = parse_qs(parsed_url.query)
if 'id' in query_params:
self.dict['App'] = query_params['id'][0]
yield self.dict
links = response.css('.WpHeLc::attr(href)').getall()
for link in links:
if link.startswith('/store/apps/collection/cluster'):
url = 'https://play.google.com' + link
print("parse_data", url," ## ",datetime.datetime.now())
yield SeleniumRequest(
url=url,
# script='window.scrollTo(0, document.body.scrollHeight);',
callback=self.parse_inner_page_scrapy,
wait_time= 3,
wait_until=EC.element_to_be_clickable((By.CSS_SELECTOR,'div.fUEl2e'))
)
def parse_inner_page_scrapy(self, response):
print("parse_inner_page_scrapy", response," ## ",datetime.datetime.now())
target_div = response.css('div.fUEl2e')
links = target_div.xpath('.//a/@href').extract()
for link in links:
if link.startswith('/store/apps/details'):
url = 'https://play.google.com' + link
print(url)
# yield scrapy.Request(url=url, callback=self.parse)
I want all the similar apps present in the browser should be printed . which means it should scroll to the bottom of the page completely and load the data only using scrapy-selenium