I have an excel file with data of patients and other variables/points/dates, etc that are input into python and perform some calculations, or probability should I say. i.e. Propensity Score.
This is the code (below). It runs and displays the results and generates a new excel file with data. but I receive this error which I have no idea what it is and/or means.
import os
import pandas as pd
import numpy as np
path = r"C:\Users\Dev\Downloads\Propensity"
os.chdir(path)
print(os.getcwd())
df = pd.read_excel("BD_Cervice.xlsx")
print(df)
col_study1_anag = ['ETA', 'ISTOLOGIA', 'GRADING', 'CT NAD','TIPO CT']
col_study2_tox = ['GG INTERRUZIONE', 'TOX ACUTA GI', 'TOX ACUTA GU', 'TOX ACUTA EMATO']
col_study3_prog = ['EI_censor', 'OS censor', 'MFS censor']
new_cols = col_study1_anag+col_study2_tox+col_study3_prog
df2 = df[new_cols]
print(df2.columns)
#Cisplatino settimanale
CT1 = df2[df2["TIPO CT"]==1]
CT1.head()
nrows1 = len(CT1.index)
print("TIPO CT = 1:", nrows1)
#Cisplatino + 5 Flururacile IC
CT2 = df2[df2["TIPO CT"]==2]
CT2.head()
nrows2 = len(CT2.index)
print("TIPO CT = 2:", nrows2)
#Cisplatino + xeloda
CT4 = df2[df2["TIPO CT"]==4]
CT4.head()
nrows4 = len(CT4.index)
print("TIPO CT = 4:", nrows4)
#Pz che non eseguono CT in concomitanza
CT0 = df2[df2["TIPO CT"]==0]
CT0.head()
nrows0 = len(CT0.index)
print("TIPO CT = 0:", nrows0)
CT1_vs_CT4 = pd.concat([CT1,CT4])
CT1_vs_CT4 = CT1_vs_CT4.loc[(CT1_vs_CT4['EI_censor'] != 'ND') & (CT1_vs_CT4['OS censor'] != 'ND') & (CT1_vs_CT4['MFS censor'] != 'ND')]
print("CT1 vs CT4 nrows", len(CT1_vs_CT4.index))
# Replace Values in Column
CT1_vs_CT4['TIPO CT'] = CT1_vs_CT4['TIPO CT'].replace('2','0')
CT1_vs_CT4['TIPO CT'] = CT1_vs_CT4['TIPO CT'].replace('4','1')
print(CT1_vs_CT4.tail(10))
#isolate Y, x, confounders
Y = CT1_vs_CT4.loc[:,'OS censor'].values
X = CT1_vs_CT4.loc[:,'TIPO CT'].values
confounders = CT1_vs_CT4.drop(columns = ['TIPO CT','OS censor']).values
print("CT1 vs CT4 nrows", len(CT1_vs_CT4.index))
# determining the name of the file
file_name = 'prova.xlsx'
# saving the excel
CT1_vs_CT4.to_excel(file_name)
#propensity score matching
from causalinference import CausalModel
model = CausalModel(Y, X, confounders)
model.est_via_matching(bias_adj = True)
This is the error
Exception has occurred: ValueError
Too few control units: N_c ‹ K+1
File "C:\Users\Dev\Downloads Propensity\test1 py", line 68, in ‹module>
model = CausalModel(Y, X, confounders)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
ValueError: Too few control units: N_c < K+1
Result Currently:
Can anyone help?
I appreciate this is not your every day problem
The above code I wrote to render the results I require.