import pandas as pd # Pandas is used for data manipulation and analysis
import datetime # Manipulating date and times
import requests # Allows for making HTTPS requests to websites
from requests.exceptions import ConnectionError
from bs4 import BeautifulSoup # Beautiful Soup used for web scraping, particularly HTML and XML documents
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
# Function to find the value inside the url (url content)
def web_content_div(web_content, class_path):
# Find the div from the web_content with a specific class path
web_content_div = web_content.findAll('div', {"class": class_path})
print(web_content_div)
try:
# Find all the spans within the dive
spans = web_content_div[0].find_all('span')
texts = [span.get_text() for span in spans]
print(spans)
except IndexError:
texts = []
return texts
def real_time_price(stock_code):
# Locate the information
url = 'https://finance.yahoo.com/quote/' + stock_code + '?.tsrc=fin-srch'
# See if there is anything we can get from the url
try:
chrome_options = Options()
chrome_options.add_argument("--headless")
driver = webdriver.Chrome(options=chrome_options)
driver.get(url)
driver.implicitly_wait(5)
html = driver.page_source
# Capture the text from the url
web_content = BeautifulSoup(html, 'lxml')
# print("Web Content: " + str(web_content))
# Parse the text into the web function
texts = web_content_div(web_content, 'bottom svelte-okyrr7')
driver.close()
if texts:
price, change = texts[0], texts[1]
else:
price, change = [], []
except ConnectionError:
price, change = [], []
return price, change
Stock = ["BRK-B"]
print(real_time_price('BRK-B'))
Im making a program which would give stock updates in real time. I made this starter program to reflect the stock price from yahoo finance for Berkshire Hathaway. The span containing the price and price change lie in the parent div class bottom svelte-okyrr7. However, it seems that this line:
web_content_div = web_content.findAll('div', {"class": class_path})
Is not able to locate that div at all. What am I doing wrong?
I tried directly using the div class under which both spans existed but that didn't work either. I am not sure how I could make the the findAll() method actually find the div and extract the spans from it. Even Selenium isn't helping (unless im using it incorrectly)