I'm writing a program that parses websites (takes links from an Excel file) and then enters the status of loading each link into Excel.
I used the xlwings library because it allows you to make changes to the file without overwriting it (that is, smart tables, external links, etc. are saved, unlike openpyxl)
I encountered a problem that xlwings cannot save changes to the input file, but at the same time safely saves the result to a new file.
This is the error it gives:
Error: Exception in thread Thread-1 (start_download): pywintypes.com_error: (-2147352567, 'Exception occurred.', (0, 'Microsoft Excel', "Неможливо отримати доступ до документа ''Lifecell – копія.xlsx'', призначеного лише для читання.", 'xlmain11.chm', 0, -2146827284), None)
That is, the error is that the file is open only for reading, I don’t know what this is connected with, so I tried creating a new file, deleting the input one and changing the name of the new file. But the code gave an error that the file was occupied by other processes and could not be deleted.
To summarize, I can say that some process does not allow any manipulation with the file after an attempt to change the contents.
!In other code, which is similar and also makes changes to the file, there are no errors, everything works okay.
Program code with saving problem:
# Function to start the download in a thread
def start_download_thread():
download_thread = Thread(target=start_download)
download_thread.start()
def start_download():
global excel_path
if not excel_path:
print("Шлях до файлу Excel не визначений.")
return
try:
start_button.config(state="disabled")
start_time = time.time()
selected_sheet = sheet_combo.get()
url_column = description_combo.get()
clip_id_column = clip_id_combo.get()
access_column = access_combo.get()
app = xw.App(visible=False)
app.display_alerts = False
book = app.books.open(excel_path, read_only=False)
sheet = book.sheets[selected_sheet]
df = sheet.range('A1').options(pd.DataFrame, expand='table', index=False).value
download_statuses = ['Недоступно'] * len(df)
base_name = os.path.basename(excel_path)
file_name_without_extension = os.path.splitext(base_name)[0]
folder_name = f'creatives_{file_name_without_extension}'
folder_path = os.path.join(os.path.dirname(excel_path), folder_name)
ensure_directory_exists(folder_path)
if brand_sorting_checkvar.get():
brand_column = brand_column_combo.get()
brands = df[brand_column].unique()
folders = {brand: os.path.join(folder_path, str(brand)) for brand in brands}
for brand_folder in folders.values():
ensure_directory_exists(brand_folder)
else:
folders = {None: folder_path}
with ThreadPoolExecutor(max_workers=15) as executor:
futures = []
for index, row in df.iterrows():
if row.get(access_column) == 'Доступно':
brand = row.get(brand_column) if brand_sorting_checkvar.get() else None
folder = folders.get(brand, folder_path)
future = executor.submit(process_row, index, row, folder, url_column, clip_id_column, access_column)
futures.append(future)
for future in futures:
index, result = future.result()
download_statuses[index] = result
df['Завантаження'] = download_statuses
excel_columns = [cell.value for cell in sheet.range('1:1').expand('right')]
col_index = excel_columns.index('Завантаження') + 1 if 'Завантаження' in excel_columns else len(excel_columns) + 1
if 'Завантаження' not in excel_columns:
sheet.range((1, col_index)).value = 'Завантаження'
statuses_values = [[status] for status in download_statuses]
sheet.range((2, col_index), (1 + len(download_statuses), col_index)).value = statuses_values
book.save(excel_path) # Указываем путь явно при сохранении
end_time = time.time()
elapsed_time = (end_time - start_time) / 60
execution_time_label.config(text=f"Час виконання: {elapsed_time:.2f} хвилин")
except Exception as e:
print("Error: ", e)
finally:
start_button.config(state="normal")
if book:
book.close()
app.quit()
Code of a program that works fine:
def save_with_hyperlinks_xlwings(df, file_path, sheet_name, url_column):
app = xw.App(visible=False) # Launch Excel in the background
app.display_alerts = False # Disable Excel alerts to prevent modal dialogs
try:
book = app.books.open(file_path)
sheet = book.sheets[sheet_name]
# Determine the column indices for URL and 'Доступ' (Availability)
# url_col_idx = df.columns.get_loc(url_column) + 1
status_column_name = 'Доступ'
excel_columns = sheet.range('1:1').expand('right').value
status_col_idx = excel_columns.index(status_column_name) + 1 if status_column_name in excel_columns else len(excel_columns) + 1
# Insert the status column header if it does not exist
if status_column_name not in excel_columns:
sheet.range(1, status_col_idx).value = status_column_name
# Bulk update for statuses
statuses = df[status_column_name].tolist()
sheet.range((2, status_col_idx), (1 + len(statuses), status_col_idx)).value = [[status] for status in statuses]
except Exception as e:
print(f"Помилка: {e}")
finally:
if book:
try:
book.save()
book.close()
except Exception as e:
print(f"Помилка при закритті та збереженні книги: {e}")
app.quit()
I checked the exel_path, the data that is saved to the file, I debugged each variable - everything was ok. Perhaps the problem is in the use of pandas or in multithreading, but I cannot fully understand what the problem is. ChatGPT 4 doesn't help much either, it doesn't provide a solution