How to filter one dataframe column values using another dataframe column value?

87 views Asked by At

Below is my Dataframe: Df1

origin        2001-01-01 00:00:00  2002-01-01 00:00:00  2003-01-01 00:00:00  ...  2009-01-01 00:00:00  2010-01-01 00:00:00   Grand Total
Simulation 1         2.281294e+13                  NaN         1.459444e+20  ...         1.618202e+59         6.811895e+64  1.748673e+72
Simulation 2         2.175246e+13                  NaN         1.711078e+20  ...         1.388109e+59         5.608744e+64  2.106283e+72
Simulation 3         2.280800e+13                  NaN         1.422197e+20  ...         9.737150e+58         2.589176e+64  9.651312e+71
Simulation 4         2.426074e+13                  NaN         1.369407e+20  ...         1.063072e+59         2.921763e+64  1.961873e+71

2nd Dataframe : df2

 CI           Var 
0  60.0  3.009765e+72 
1  70.0  3.511393e+72 
2  80.0  4.013020e+72 
3  90.5  4.539729e+72 

below is my code:

class cl_test():
    @classmethod
    def resamledtriangle(self,n_sims):
        # Load triangle
        triangle = cl.load_sample('genins')
        # Use bootstrap sampler to get resampled triangles
        bootstrapdataframe = cl.BootstrapODPSample(n_sims = n_sims, random_state=42).fit(triangle).resampled_triangles_

        #converting to dataframe
        resampledtriangledf1 = bootstrapdataframe.to_frame()
        #print(resampledtriangledf1)
        Reserve_val = []
        iter_count=0
    
        while (iter_count < n_sims):
            bg = resampledtriangledf1[resampledtriangledf1.index==iter_count]
            resampledtriangledf = bg.pivot_table(index="origin", columns="development", values="values")
            #print(resamp)
            #final = resampledtriangledf[::-1].bfill().iloc[0]
            final2 = resampledtriangledf.ffill(axis=1).iloc[:, -1]
            resampledtriangledf = resampledtriangledf.assign(Total = final2.values)
            #resampledtriangledf = resampledtriangledf.append(pd.Series(totTriangleDf1.sum(), name='Total')).append(pd.Series(final, name='Latest Observation'))
            #calculating grand total of each row and column
            #resampledtriangledf['Grand Total'] = resampledtriangledf.sum(axis=1)
            #resampledtriangledf.loc["Grand Total"] = resampledtriangledf.sum() 
            resampledtriangledf = resampledtriangledf.append(pd.Series(resampledtriangledf.sum(), name='Grand Total'))  
            resampledtriangcdfwa1 = resampledtriangledf.shift(-1, axis=1).loc['Grand Total'] / (resampledtriangledf.loc['Grand Total'])
            resampledtriangledf = resampledtriangledf.append(pd.Series(resampledtriangcdfwa1,name='Weighted Average'))
            c = resampledtriangledf.iloc[1][::-1].replace(np.inf, np.nan).dropna().cumprod()
            resampledtriangledf = resampledtriangledf.append(c.rename('CDF'))

            s = resampledtriangledf.iloc[:, :][:-2].copy()
            ultiCalc = pd.DataFrame()
            ultiCalc['Latest']= s['Total']
            ultiCalc['CDF'] = np.flip(resampledtriangledf.loc['CDF'].values) 
            ultiCalc['Ultimate'] = ultiCalc['Latest']*ultiCalc['CDF'] 
            ultiCalc['Unpaid'] = ultiCalc['Ultimate']-ultiCalc['Latest']
            ultiCalc = ultiCalc.append(pd.Series(ultiCalc.sum(), name='Total'))
            #ultiCalc = ultiCalc.iloc[: , :-1]
            iter_count+=1
            Reserve_val.append(ultiCalc['Unpaid'])  


        simulationDf = pd.DataFrame([x for x in Reserve_val]).reset_index(drop=True)
        simulationDf.index = 'Simulation ' + (simulationDf.index + 1).astype(str)
        simulationDf = simulationDf.iloc[: , :-1]
        return simulationDf

    def varmethod(self,CI,simulationDf):
        total = simulationDf['Grand Total'].sum() 
        df = pd.DataFrame(CI,columns=['CI'])
        for value,data in enumerate(CI): #== The enumerate() method adds counter to an iterable and returns it (the enumerate object).
            CI[value]=(data/100)*(total)
        df['Var'] = CI
        print(df)
        return df

Here I have to iterate the var values from df2 and compare with grand total column of df dataframe. And create the new dataframe and store all the grand total values which is grater than Var values. For ex: First value of Var column is 3.009765e+72. I have to filter all greater values from grand total of df1 create dataframe for it. Next we have to filter for second value of Var(3.511393e+72 ) and create dataframe. So on

0

There are 0 answers