I've written a script that takes a large excel spreadsheet of data and strips away unwanted columns, rows that contain zero values in particular columns and then saves out to a csv. The piece that I'm stuck on is I'm also trying to remove rows that are missing cells. The way I was trying this was by way of:
for each_row in row_list :
if not all(map(len, each_row)) :
continue
else :
UICData.append(row_list)
But this isn't working correctly as I'm getting the error:
File "/Users/kenmarold/PycharmProjects/sweetCrude/Work/sweetCrude.py",
line 56, in PrepareRawData
if not all(map(len, each_row)) :
TypeError: 'float' object is not iterable
I'm not exactly sure how to resolve this, what's the way forward on this? I've also attached the full script below.
#!/usr/bin/env python3
import os
import sqlite3
import csv
import unicodecsv
from datetime import date
from xlrd import open_workbook, xldate_as_tuple
from xlwt import Workbook
orig_xls = 'data/all_uic_wells_jun_2016.xls'
temp_xls = 'data/temp.xls'
new_csv = 'data/gh_ready_uic_well_data.csv'
temp_csv = 'data/temp.csv'
input_worksheet_index = 0 # XLS Sheet Number
output_workbook = Workbook()
output_worksheet = output_workbook.add_sheet('Sweet Crude')
lat_col_index = 13
long_col_index = 14
#### SELECT AND FORMAT DATA
def PrepareRawData(inputFile, tempXLSFile, tempCSVFile, outputFile):
# 0 = API# # 7 = Approval Date
# 1 = Operator # 13 = Latitude
# 2 = Operator ID # 14 = Longitude
# 3 = Well Type # 15 = Zone
keep_columns = [0, 1, 2, 3, 7, 13, 14, 15]
with open_workbook(inputFile) as rawUICData:
UICSheet = rawUICData.sheet_by_index(input_worksheet_index)
UICData = []
for each_row_index in range(1, UICSheet.nrows - 1, 1):
row_list = []
lat_num = UICSheet.cell_value(each_row_index, lat_col_index) # Get Lat Values
long_num = UICSheet.cell_value(each_row_index, long_col_index) # Get Long Values
if lat_num != 0.0 and long_num != 0.0: # Find Zero Lat/Long Values
for each_column_index in keep_columns:
cell_value = UICSheet.cell_value(each_row_index, each_column_index)
cell_type = UICSheet.cell_type(each_row_index, each_column_index)
if cell_type == 3:
date_cell = xldate_as_tuple(cell_value, rawUICData.datemode)
date_cell = date(*date_cell[0:3]).strftime('%m/%d/%Y')
row_list.append(date_cell)
else:
row_list.append(cell_value)
for each_row in row_list :
if not all(map(len, each_row)) :
continue
else :
UICData.append(row_list)
# CreateDB(row_list) # Send row data to Database
for each_list_index, output_list in enumerate(UICData):
for each_element_index, element in enumerate(output_list):
output_worksheet.write(each_list_index, each_element_index, element)
output_workbook.save(tempXLSFile)
#### RUN XLS-CSV CONVERSION
workbook = open_workbook(tempXLSFile)
sheet = workbook.sheet_by_index(input_worksheet_index)
fh = open(outputFile, 'wb')
csv_out = unicodecsv.writer(fh, encoding = 'utf-8')
for each_row_number in range(sheet.nrows) :
csv_out.writerow(sheet.row_values(each_row_number))
fh.close()
#### KILL TEMP FILES
filesToRemove = [tempXLSFile]
for each_file in filesToRemove:
os.remove(each_file)
print("Raw Data Conversion Ready for Grasshopper")
# ---------------------------------------------------
PrepareRawData(orig_xls, temp_xls, temp_csv, new_csv)
# ---------------------------------------------------
This is a dirty patch.
EDIT: If the any/map/len raises it still, then I would try a different route to check if it's empty.
Also I'm not sure why you are appending the entire row_list and not the current row. I changed it to appending each_row.
Option1
Option2