I have created an API that reads a file we recieve and then gives me back the information I need. I now need to create a new .xlsx file with this new data I have recieved.
Currently it's working, but it is really not optimized. Creating a file with 4000 lines pretty much instant, but we will need to create files with up to hundreds of thousands of lines. At the moment, creating a file with 140,000 lines took 14 minutes.
I am not very experienced in algoritm optimization so i'm not sure where look for optimizing.
This is a project written with a Django backend. Here is the views.py function
def vannmiljo(request):
survey_id = request.GET.get('surveyID', None)
if not survey_id:
return JsonResponse({'error': 'SurveyID parameter is missing'}, status=400)
instanceID = request.GET.get('m_instanceID', None)
if not instanceID:
error = "No datafile"
return JsonResponse({'error': error}, status=400)
instance = SurveyInstance.objects.get(id=instanceID)
if not instance.survey_file:
error = "No datafile"
return JsonResponse({'error': error}, status=400)
else:
excel_file_url = instance.get_survey_file_url
response = requests.get(excel_file_url)
if not response.status_code == 200:
print(f'Failed to download Excel file: {response.status_code}')
excel_data = BytesIO(response.content)
try:
dfs = pd.read_excel(excel_data, engine="openpyxl")
start_row = dfs[dfs.eq('Sal.').any(axis=1)].index[0]
dfs = dfs.iloc[start_row:]
dfs.reset_index(drop=True, inplace=True)
dfs.columns = dfs.iloc[0]
dfs = dfs[1:]
data_list = []
columns_to_keep = [col for col in ["Sal.", "Cond.", "Temp", "Ox %", "mg/l", "T (FTU)", "Density", "S. vel.", "Depth(dp)", "Date", "Time"] if col in dfs.columns]
# Create a new DataFrame with only the necessary columns
filtered_dfs = dfs[columns_to_keep].copy()
# Rename the columns to the desired names
column_renames = {
"Sal.": "sal",
"Cond.": "cond",
"Temp": "temp",
"Ox %": "ox",
"mg/l": "mg_l",
"T (FTU)": "t_ftu",
"Density": "density",
"S. vel.": "s_vel",
"Depth(dp)": "depth",
"Date": "date",
"Time": "time"
}
filtered_dfs.rename(columns=column_renames, inplace=True)
# Convert DataFrame to a list of dictionaries (if needed)
data_list = filtered_dfs.to_dict('records')
survey_instance = get_object_or_404(Survey, SurveyID=survey_id)
survey_details = survey_instance.addInfoID_fk
approver = survey_details.get_approver
approver_organization = approver.organisation
approver_org_name = approver_organization.org_name if approver_organization else None
oppdragsgiver = approver_org_name
oppdragstaker = approver_org_name
data = {
'oppdragsgiver': oppdragsgiver,
'oppdragstaker': oppdragstaker,
'surveyID': survey_id,
'data_list': data_list,
}
print(data)
except Exception as e:
# Catch any exception and return a response indicating the issue
return HttpResponse(f"Error: {str(e)}", status=500)
def stream_data():
yield '{'
yield f'"oppdragsgiver": "{data["oppdragsgiver"]}",'
yield f'"oppdragstaker": "{data["oppdragstaker"]}",'
yield f'"surveyID": "{data["surveyID"]}",'
yield '"data_list": ['
first = True
for item in data['data_list']:
# Convert datetime and time objects to strings
for key, value in item.items():
if isinstance(value, (datetime.datetime, datetime.time)):
item[key] = value.isoformat()
# Add a comma before each item except the first one
if first:
first = False
else:
yield ','
yield json.dumps(item)
yield ']}'
response = StreamingHttpResponse(stream_data(), content_type='application/json')
return response
Here is the frontend JS code
function createDataToSend(data, oppdragsgiver, oppdragstaker) {
const parameter_mapping = {
'cond': 'KOND',
'density': 'DENS',
's_vel': 'SOUNDVEL',
'sal': 'SALIN',
't_ftu': 'TURB',
'temp': 'TEMP',
'ox': 'O2',
'mg_l': 'MGL'
};
const enhet_id_mapping = {
'cond': 'ms/m',
'density': 'density',
's_vel': 'm/s',
'sal': 'ppt',
't_ftu': 'FNU',
'temp': '°C',
'ox': 'mg/l',
'mg_l': 'mg/l'
};
const dataToSend = [];
if(data !== undefined){
data.forEach(item => {
const parameters = ['cond', 'density', 's_vel', 'sal', 't_ftu', 'temp', 'ox', 'mg_l'];
parameters.forEach(parameter => {
if (item[parameter] !== undefined) {
const obj = {
"Vannlok_kode": "001",
"Aktivitet_id": "A123",
"Oppdragsgiver": oppdragsgiver,
"Oppdragstaker": oppdragstaker,
"Medium_id": "Saltvann",
"Parameter_id": parameter_mapping[parameter],
"Provetakingsmetode_id": "Method1",
"Filtrert_Prove": "False",
"UnntasKlassifisering" : "",
"Analysemetode_id": "Analysis1",
"Operator": "=",
"Verdi": item[parameter].toString(),
"Enhet_id": enhet_id_mapping[parameter],
"Provenr": "",
"Tid_provetak": new Date(item.date).toISOString().split('T')[0] + ' ' + item.time,
"Ovre_dyp": item.depth.toString(),
"Nedre_dyp": item.depth.toString(),
"Dybdeenhet": "m",
"Ant_verdier": "",
"Kommentar": "",
"Deteksjonsgrense": "",
"Kvantifiseringsgrense": "",
"ID_lokal" : "",
};
dataToSend.push(obj);
}
});
});
}
dataToSend.sort((a, b) => {
if (a.Parameter_id < b.Parameter_id) {
return -1;
}
if (a.Parameter_id > b.Parameter_id) {
return 1;
}
return 0;
});
return dataToSend;
}
document.addEventListener('DOMContentLoaded', function() {
document.querySelectorAll('.downloadBtn').forEach(button => {
button.addEventListener('click', function() {
const surveyID = this.getAttribute('data-survey-id');
const instanceID = this.getAttribute('data-instance-id');
console.log('Clicked surveyID:', surveyID, 'instanceID:', instanceID);
fetch(`/surveys/survey/vannmiljo/?surveyID=${surveyID}&m_instanceID=${instanceID}`)
.then(response => {
if (!response.ok) {
throw new Error(`HTTP error! status: ${response.status}`);
}
return response.text();
})
.then(text => {
const data = text.split('\n').filter(line => line).map(JSON.parse);
return data;
})
.then(data => {
const dataObject = data[0];
const oppdragsgiver = dataObject.oppdragsgiver;
const oppdragstaker = dataObject.oppdragstaker;
console.log('Oppdragsgiver:', oppdragsgiver, 'Oppdragstaker:', oppdragstaker);
if (!Array.isArray(dataObject.data_list)) {
console.error('data.data_list is not an array');
return;
}
const dataToSend = createDataToSend(dataObject.data_list, oppdragsgiver, oppdragstaker); // Modify this line
if (dataObject.data_list && dataObject.data_list.length > 0) {
if (Array.isArray(dataToSend) && dataToSend.length > 0) {
fetch('http://localhost:3000/api/generate-excel', {
method: 'POST',
headers: {
'Content-Type': 'application/json',
},
body: JSON.stringify({ data: dataToSend }),
})
.then(response => response.blob())
.then(blob => {
const url = window.URL.createObjectURL(blob);
const a = document.createElement('a');
a.href = url;
a.download = "survey_data.xlsx";
document.body.appendChild(a);
a.click();
window.URL.revokeObjectURL(url);
})
} else {
console.log('No data to send 1');
}
} else {
console.log('No data to send 2');
}
});
});
});
});
export const generateExcel = async (req: Request, res: Response): Promise<void> => {
const errors = validationResult(req);
if (!errors.isEmpty()) {
res.status(400).json({ errors: errors.array() });
return;
}
const data: any[] = req.body.data;
if (!Array.isArray(data)) {
console.error('Data is not an array');
res.status(400).send('Invalid data format');
return;
}
const workbook = new ExcelJS.Workbook();
const worksheet = workbook.addWorksheet('Survey Data');
worksheet.columns = [
{ header: 'Vannlokasjon', key: 'Vannlok_kode', width: 20 },
{ header: 'Aktivitet_id', key: 'Aktivitet_id', width: 20 },
{ header: 'Oppdragsgiver', key: 'Oppdragsgiver', width: 30 },
{ header: 'Oppdragstaker ', key: 'Oppdragstaker', width: 30 },
{ header: 'Parameter' , key: 'Parameter_id', width: 20 },
{ header: 'Medium' , key: 'Medium_id', width: 20 },
{ header: 'Provetakingsmetode' , key: 'Provetakingsmetode_id', width: 25 },
{ header: 'Filtrert Prøve' , key: 'Filtrert_Prove', width: 20 },
{ header: 'Unntas Klassifisering' , key: 'UnntasKlassifisering', width: 20 },
{ header: 'Analysemetode' , key: 'Analysemetode_id', width: 25 },
{ header: 'Operator' , key: 'Operator', width: 20 },
{ header: 'Verdi', key: 'Verdi', width: 20 },
{ header: 'Enhet', key: 'Enhet_id', width: 20 },
{ header: 'Prøve nr' , key: 'Provenr', width: 20 },
{ header: 'Tid Prøvetaking', key: 'Tid_provetak' , width: 30 },
{ header: 'Øvre dyp', key: 'Ovre_dyp', width: 15 },
{ header: 'Nedre dyp', key: 'Nedre_dyp', width: 15 },
{ header: 'Dybdeenhet' , key: 'Dybdeenhet', width: 12 },
{ header: 'Antall verdier' , key: 'Ant_verdier', width: 20 },
{ header: 'Kommentar' , key: 'Kommentar', width: 20 },
{ header: 'Deteksjonsgrense' , key: 'Deteksjonsgrense', width: 20 },
{ header: 'Kvantifiseringsgrense' , key: 'Kvantifiseringsgrense', width: 20 },
{ header: 'Lokal ID' , key: 'ID_lokal', width: 20 },
];
const headerRow = worksheet.getRow(1);
headerRow.height = 20;
headerRow.eachCell((cell, number) => {
cell.font = { bold: true };
cell.border = {
bottom: { style: 'thin', color: { argb: '000000' } }
};
});
worksheet.addRows(data);
worksheet.eachRow((row, rowNumber) => {
row.eachCell((cell, colNumber) => {
cell.border = {
top: { style: 'thin', color: { argb: '000000' } },
left: { style: 'thin', color: { argb: '000000' } },
bottom: { style: 'thin', color: { argb: '000000' } },
right: { style: 'thin', color: { argb: '000000' } }
};
});
});
try {
res.setHeader('Content-Type', 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet');
res.setHeader('Content-Disposition', 'attachment; filename=survey_data.xlsx');
await workbook.xlsx.write(res);
res.end();
} catch (error) {
console.error(error);
res.status(500).send('An error occurred while generating the Excel file.');
}
};
export const surveyValidationRules = [
body('data.*.Vannlok_kode').notEmpty().withMessage('Vannlok_kode is required and cannot be empty'),
body('data.*.Aktivitet_id').notEmpty().withMessage('Aktivitet_id is required and cannot be empty'),
body('data.*.Oppdragsgiver').notEmpty().withMessage('Oppdragsgiver is required and cannot be empty'),
body('data.*.Oppdragstaker').notEmpty().withMessage('Oppdragstaker is required and cannot be empty'),
body('data.*.Parameter_id').notEmpty().withMessage('Parameter_id is required and cannot be empty'),
body('data.*.Medium_id').notEmpty().withMessage('Medium_id is required and cannot be empty'),
body('data.*.Provetakingsmetode_id').notEmpty().withMessage('Provetakingsmetode_id is required and cannot be empty'),
body('data.*.Analysemetode_id').notEmpty().withMessage('Analysemetode_id is required and cannot be empty'),
body('data.*.Operator').notEmpty().withMessage('Operator is required and cannot be empty'),
body('data.*.Verdi').notEmpty().withMessage('Verdi is required and cannot be empty'),
body('data.*.Enhet_id').notEmpty().withMessage('Enhet_id is required and cannot be empty'),
body('data.*.Tid_provetak').notEmpty().withMessage('Tid_provetak is required and cannot be empty'),
body('data.*.Ovre_dyp').notEmpty().withMessage('Ovre_dyp is required and cannot be empty'),
body('data.*.Nedre_dyp').notEmpty().withMessage('Nedre_dyp is required and cannot be empty'),
body('data.*.Dybdeenhet').notEmpty().withMessage('Dybdeenhet is required and cannot be empty'),
]
As it seems this function's runtime is performing badly, there must be something I can improve.