i use selenium webdriver to crawl links , but it is so much time consuming,as there are hundreds of thousands of links to crawl, so want to use some short method which is less time consuming to crawl all links.

How to solve this using beautifulsoup or ghostdriver or any other method?

below is my selenium webdriver code.

from bs4 import BeautifulSoup
import requests
import time
from selenium import webdriver

##Catogory link 1
r1 = requests.get('https://dir.indiamart.com/indianexporters/pl_pvc.html')
soup1 = BeautifulSoup(r1.text,'lxml')

section = soup1.find('section',class_='ctgry')
for link1 in section.find_all('a',class_='slink'):
    link = "https://dir.indiamart.com" + link1['href']
    print(link + "--------------------------Category")
    try:
        # specify chrome driver path below
        browser = webdriver.Chrome(r'C:\webdriver\chromedriver.exe')

        browser.get(link)
        time.sleep(0.2)
        body = browser.find_element_by_tag_name("body")

        # Get scroll height
        last_height = browser.execute_script("return document.body.scrollHeight")
        new_height = 0
        # scroll until the bottom of the page as been reached
        while new_height != last_height:
            last_height = new_height
            # Scroll down to bottom
            browser.execute_script("window.scrollTo(0, document.body.scrollHeight);")
            # Wait to load page
            time.sleep(0.2)
            # Calculate new scroll height
            new_height = browser.execute_script("return document.body.scrollHeight")

        bodyHTML = browser.find_element_by_tag_name("body").get_attribute('innerHTML')

        # parse the body using beautiful soup
        soup = BeautifulSoup(bodyHTML,'lxml')

        # store and print company links
        result = []
        for links in soup.find_all('div',class_='r-cl b-gry'):
            link = links.find('a')
            print(f"{link['href']}")
            result.append(link['href'])
        browser.close()

    ##    print(f"{len(result)} links found")
    except:
        pass

0 Answers