Compare row values in excel and store time value in a variable

20 views Asked by At

I'm writing a script to open an excel file and check a specific "energy" column for row values equal to zero. If it is equal to zero, store the user ID and total duration values (which is in a different column, same row). Then, check the next row if "energy value" equals zero, if not equal to zero, then check the user ID. If it is the same as previous row, then keep the value of the total duration. If next row energy value is equal to zero, then check the user ID, if it is the same as previous row, then update the total duration value to include the total duration value in this row. If user ID is not the same as previous row then proceed to next row.

import datetime
import openpyxl
#import pandas

workbook = ("filename.xlsx")
workbook_op = openpyxl.load_workbook(workbook)
workbook_s = workbook_op.active

print("Column Names:")
for cell in workbook_s\[1\]:  # Assuming column names are in the first row
print(cell.value)

    user_id = None
    temp_total_duration = 0
    t_excluded_values = {}
    next_row = None  # Initialize variable to store next row data your text`
    
    for row in workbook_s.iter_rows(min_row=2):  # Skip the header row
        current_user_id = row[18].value # Assuming user ID is in the 17th column
        current_energy = row[10].value  # Assuming energy is in the 9th column
        current_duration = row[8].value  # Assuming duration is in the 7th column
    
        # Peek at the next row (if available)
        try:
            next_row = next(workbook_s.iter_rows(min_row=row[0].row + 1, max_rows=1))
            next_energy = next_row[2].value
            next_user_id = next_row[0].value
        except StopIteration:
            next_row = None
            next_energy = None
            next_user_id = None
    
        if current_energy == 0:
            user_id = current_user_id
            temp_total_duration = 0
        elif next_energy != 0:  # Check next row energy value
            if current_user_id == user_id:
                t_excluded_values[user_id] = temp_total_duration
                temp_total_duration += current_duration
            else:
                user_id = current_user_id
                temp_total_duration = current_duration
        else:  # Next row energy is zero, check next row user ID
            if current_user_id == next_user_id:
                temp_total_duration += current_duration
            else:  # Reached end of user data block
                user_id = None
                temp_total_duration = 0
    
    # Update the last user's T-excluded value (if any)
    if user_id:
        t_excluded_values[user_id] = temp_total_duration
    
    # Print the collected T-excluded values (optional)
    for user, duration in t_excluded_values.items():
        print(f"User ID: {user}, T-excluded Duration: {duration}")

# Example usage:

process_excel_file("your_excel_file.xlsx")
0

There are 0 answers