I writing a script to scrap data from a website "https://pfchangsmexico.com.mx/ubicaciones/" in which I want latitude longitude values of each restaurants enter image description here in the image I have highlighted the latitude and longitude values but don't know to write its xpath or how to use css selector that. tell me how to write xpath for or css selector
import scrapy
import re
from scrapy_selenium import SeleniumRequest
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium import webdriver
class BistroSpider(scrapy.Spider):
name = "bistro"
allowed_domains = ["pfchangsmexico.com.mx"]
start_urls = ["https://pfchangsmexico.com.mx/index.html"]
"""def __init__(self):
super(BistroSpider, self).__init__()
self.selenium = webdriver.Chrome()"""
def parse(self, response):
location_page = response.css('div a::attr(href)').get()
yield SeleniumRequest(url=location_page, callback=self.parse_info)
def parse_info(self, response):
"""iframe_locator = (By.XPATH, '//div/iframe')
WebDriverWait(self.selenium, 10).until(EC.frame_to_be_available_and_switch_to_it(iframe_locator))"""
res_names = response.xpath('//div/p[1]/span[1]/text()').getall()
res_names = res_names[1:]
print(res_names)
print(len(res_names))
res_address = response.xpath('//div/p[1]/span[2]/text()').getall()
print(res_address)
print(len(res_address))
addresses = response.xpath('//div/p/span[2]').getall()
addresses = [address for address in addresses if address !=
'<span style="font-family: Avenir;">Servicio a domicilio:</span>']
addresses = [address.replace('</span>', '') for address in addresses]
addresses = [re.sub(r'<.*?>', '', address) for address in addresses]
print(addresses)
print(len(addresses))
postcode = response.xpath('//div/p[1]/span[3]/text()').getall()
print(postcode)
print(len(postcode))
phoneno = response.xpath('//div/p[4]/a[1]/span[1]/text()').getall()
print(phoneno)
print(len(phoneno))
""" Guadalajara (3) - 3rds path thats why 27 phone no in output
//*[@id="guadalajara"]/div[2]/div[5]/div[1]/div/div/div[2]/div/p[3]/a[1]/span"""
"""iframe_element = self.selenium.find_element(By.XPATH, '//div/iframe')
iframe_content = iframe_element.get_attribute("innerHTML")
print("Iframe Content:")
print(iframe_content)"""
``` this is my in this I also want latitude longitude values of each restaurants
Each iframe has a src attribute that is effectively a hyperlink to Google maps. That attribute contains the longitude and latitude.
These can be extracted as follows:
Output:
...and so on