Copy Paste not working with headless browser in python selenium

1.6k views Asked by At

I am using selenium with python to click a button on a webpage. This copies data in csv format on to the clipboard. Then I use the data on clipboard to create an array, which is used further in the program. Everything works fine until I start the webdriver in headless mode. Is there any solution to the problem? Can this entire code be written without selenium? I am open to ideas and improvements in my code.

    try:
        objFFOptions = Options()
        objFFOptions.add_argument('--headless')
        objFFWebDriver = webdriver.Firefox(options= objFFOptions ) # start hidden
        #objFFWebDriver = webdriver.Firefox()
    except:
        print("Error in initiating the Firefox webdriver")
        objFFWebDriver.quit()
        quit()


    try:
        objFFWebDriver.get("https://chartink.com/screener/90dis")
    except:
        print("Error in opening the webpage")
        objFFWebDriver.quit()
        quit()

    # loop for waiting before query data loads
    intAttemptCounter = 0
    boolStockDataFetched = False

    while True:
        intAttemptCounter = intAttemptCounter + 1

        print("\tFetching attempt ", intAttemptCounter)
        try:
            objFilterMessageElement = WebDriverWait(objFFWebDriver, (intDelaySeconds * intAttemptCounter)). \
                until(expected_conditions.presence_of_element_located((By.ID, 'DataTables_Table_0_info')) or \
                      expected_conditions.presence_of_element_located((By.CLASS_NAME, 'dataTables_empty')))

            print("\tEither of the two marker elements found")

            if re.search(r"Filtered\s+[0-9]+\s+stocks\s+\([0-9]+\s+to\s+[0-9]+\)",
                         objFilterMessageElement.text) is not None:
                print("\t",objFilterMessageElement)

                try:
                    # click copy button
                    objFFWebDriver.find_element(By.XPATH, \
                                                "//*[@class='btn btn-default buttons-copy buttons-html5 btn-primary']").click()
                except NoSuchElementException:
                    if intAttemptCounter <= intMaxAttempt:
                        continue

                # store the query result from clipboard to a string
                strCSVData = pyperclip.paste()
                pyperclip.copy("")

                # create array from the csv string containing stock data
                arrDataList = list(csv.reader(StringIO(strCSVData),delimiter='\t'))
                arrFinalDataList = [arrDataRecord[2] for arrDataRecord in arrDataList[3:]]
                
                boolStockDataFetched = True
                break
            elif objFilterMessageElement.text == "No stocks filtered in the Scan":
                print("\t",objFilterMessageElement.text)
                break
            else:
                if intAttemptCounter <= intMaxAttempt:
                    continue

        except TimeoutException:
            print("\tTimeout Exception")
            if intAttemptCounter <= intMaxAttempt:
                continue
            else:
                break

    if boolStockDataFetched == False:
        print("Error in fetching records or no records fetched")
        
    objFFWebDriver.quit()
1

There are 1 answers

7
AKX On BEST ANSWER

You probably can't copy-paste in a headless browser. You could, instead, read the data from the visual table.

However, you don't need Selenium at all anyway, if you use your browser's inspector to look at the requests the page makes, you can formulate something that does a similar sequence, like so:

import re
from pprint import pprint
import requests

sess = requests.Session()
sess.headers["User-Agent"] = "Mozilla/5.0 Safari/537.36"

# Do initial GET request, grab CSRF token
resp = sess.get("https://chartink.com/screener/90dis")
resp.raise_for_status()
csrf_token_m = re.search(r'<meta name="csrf-token" content="(.+?)" />', resp.text)
csrf_token = csrf_token_m.group(1)

# Do data query
resp = sess.post(
    "https://chartink.com/screener/process",
    data={
        "scan_clause": "( {cash} ( latest count( 90, 1 where latest ha-low > latest ichimoku cloud top( 9 , 26 , 52 ) ) = 90 ) )",
    },
    headers={
        "Referer": "https://chartink.com/screener/90dis",
        "x-csrf-token": csrf_token,
        "x-requested-with": "XMLHttpRequest",
    },
)
resp.raise_for_status()
data = resp.json()
pprint(data)

This prints out e.g.

{'data': [{'bsecode': None,
           'close': 18389.5,
           'name': 'NIFTY100',
           'nsecode': 'NIFTY100',
           'per_chg': 1.28,
           'sr': 1,
           'volume': 0},
          {'bsecode': '532978',
           'close': 18273.8,
           'name': 'Bajaj Finserv Limited',
           'nsecode': 'BAJAJFINSV',
           'per_chg': 2.25,
           'sr': 2,
           'volume': 207802},
          ...