Background
I am working on a SeleniumBase script that is supposed to generate and save an output file after it finishes running or if it gets interrupted. This functionality works fine in my previous purely Selenium scripts, but I am encountering issues with this particular SeleniumBase script.
Issue
The script is intended to perform certain web operations and save the progress in a CSV file. It works as expected until an interruption occurs or the script finishes its execution. In these cases, no output file is generated or saved.
Here's an overview of the script:
- Login to a website using credentials.
- Navigate to a settings page for a online education course (based on urls listed in the bowhunter.csv file)
- Toggle edit mode, expand all segments of the course, and extract current required time for pages within the course that have videos on them
- Update that required time to be the same length of time as how long the video is
- Save progress in a CSV file at various points in the script.
- The script uses pandas for data handling and datetime for timestamping.
The script read information from a bowhunter.csv file to know which course to navigate to, what the video_lengths on the pages are and what to update the page to.
Problematic Part of the Script
The main concern is with the save_progress method, which should save the DataFrame df into a CSV file. The method seems to work intermittently and does not save the file in cases of interruption or successful completion.
def save_progress(self, df, suffix=""):
try:
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
output_file = f'C:\\Users\\salim\\OneDrive\\Documents\\bowhunter_updated_{suffix}_{timestamp}.csv'
print(f"Attempting to save file to {output_file}...")
df.to_csv(output_file, index=False)
print(f"Progress saved to {output_file}")
return output_file
except Exception as e:
print(f"Error saving file: {e}")
Full Script:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import NoSuchElementException, TimeoutException
from seleniumbase import BaseCase
import pandas as pd
from datetime import datetime
import time
from selenium.common.exceptions import TimeoutException
from selenium.webdriver.common.action_chains import ActionChains
class UpdateScript(BaseCase):
def login_to_website(self, url, email, password):
self.maximize_window()
self.open(url)
self.type("input#user_email", email)
self.type("input#user_password", password)
self.send_keys("input#user_password", "\n")
def toggle_edit_mode_on(self):
edit_mode_switch_selector = "span.switch-handle"
self.wait_for_element_present(edit_mode_switch_selector, timeout=30)
self.click(edit_mode_switch_selector)
def expand_all_segments(self):
try:
self.click("button#expand-all-segments")
except Exception as e:
print(f"An exception occurred while attempting to click 'Expand All': {e}")
def extract_current_time_from_modal(self):
try:
time_required_input = self.wait_for_element_visible("input#time-required", timeout=10)
current_time = time_required_input.get_attribute("value")
return "0" if current_time.strip() == "" else current_time
except Exception as e:
print(f"An exception occurred: {e}")
return None
def save_progress(self, df, suffix=""):
try:
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
output_file = f'C:\\Users\\salim\\OneDrive\\Documents\\bowhunter_updated_{suffix}_{timestamp}.csv'
print(f"Attempting to save file to {output_file}...")
df.to_csv(output_file, index=False)
print(f"Progress saved to {output_file}")
return output_file
except Exception as e:
print(f"Error saving file: {e}")
def click_save_changes(self, df, course_id):
try:
save_changes_button = self.wait_for_element_visible("css=button.btn.btn-success.btn-md[ng-click='save()']", timeout=30)
self.click(save_changes_button)
self.wait_for_element_not_present("xpath=//span[contains(text(), 'Segment successfully updated!')]", timeout=30)
df.loc[df['course_id'] == course_id, 'Updated'] = "Yes"
print("Save Changes button clicked and course updated.")
except Exception as e:
print(f"An exception occurred while saving changes: {e}")
def test_update_course(self):
self.df = pd.read_csv(r'C:\Users\salim\OneDrive\Documents\bowhunter.csv')
self.df['Current Time'] = None
self.df['Updated'] = "No"
self.login_to_website("", "", "")
current_course_id = None
for index, row in self.df.iterrows():
if current_course_id != row['course_id']:
if current_course_id is not None:
self.click_save_changes(self.df, current_course_id)
current_course_id = row['course_id']
self.open(row['chunk_url'])
self.toggle_edit_mode_on()
self.expand_all_segments()
page_name = row['page_name']
print(f"Locating page element for: {page_name}")
specific_page_element = self.wait_for_element_present(f"//p[contains(@class, 'name') and contains(text(), '{page_name}')]", timeout=10)
settings_button = specific_page_element.find_element(By.XPATH, "following-sibling::div//button[@aria-label='Actions']")
self.execute_script("arguments[0].scrollIntoView({behavior: 'auto', block: 'center', inline: 'center'});", settings_button)
time.sleep(1)
ActionChains(self.driver).click(settings_button).perform()
# time.sleep(50)
properties_option = self.wait_for_element_visible("link=Properties", timeout=10)
ActionChains(self.driver).move_to_element(properties_option).click().perform()
self.wait_for_element_visible("input#time-required", timeout=30)
# time.sleep(50)
try:
new_time_required = round(float(row['Video_Length']))
time_required_selector = "input#time-required"
self.clear(time_required_selector)
self.type(time_required_selector, str(new_time_required))
print(f"Updated 'Time Required' to {new_time_required} seconds.")
except (ValueError, TypeError):
print(f"Invalid video length for row {index + 1}. Skipping update.")
continue
# time.sleep(500)
self.click("div.modal-content div:nth-of-type(3) button:nth-of-type(2)")
# time.sleep(50)
if index + 1 < len(self.df) and self.df.iloc[index + 1]['course_id'] != current_course_id:
print(f"Completed updates for course {current_course_id}.")
self.click_save_changes(self.df, current_course_id)
if current_course_id is not None:
self.click_save_changes(self.df, current_course_id)
self.save_progress(self.df)
if __name__ == "__main__":
try:
test_script = UpdateScript()
test_script.main()
except Exception as e:
print(f"An exception occurred: {e}")
test_script.save_progress(test_script.df, "interrupted")
finally:
test_script.save_progress(test_script.df)
Attempts to Resolve
Ensured that the file path and naming convention are correct. Verified that the DataFrame df is not empty or corrupted before saving. Added print statements to confirm that the method is being called.
Question
How can I ensure that the save_progress method consistently saves the output file, regardless of whether the script is interrupted or completes successfully? Are there any best practices or modifications I should consider implementing in the script to handle such cases effectively?
As seen in SeleniumBase/examples/boilerplates/base_test_case.py, use a custom
tearDown()
method so that you can call code regardless of whether or not a test passes or fails:So in your case, you would call
self.save_progress()
from yourtearDown()
method. And you can customize how you call it based on pass/fail status if you choose to do so.