How to avoid sigkill error 9?

1.1k views Asked by At

I am trying to build an algorithm which first builds a power set of around 100 symbols excluding null set and repeated elements. Then for each item in the list of power set it reads data file and evaluates the Sharpe Ratio (Return/Risk). Results are then appended to a list and at last the program gives the best combination of symbols that would result in highest Sharpe Ratio.

Following is the code:

import pandas as pd
import numpy as np
import math
from itertools import chain, combinations
import operator
import time as t
#ASSUMPTION
#EQUAL ALLOCATION OF RESOURCES

t0 = t.time()
start_date = '2016-06-01'
end_date = '2017-08-18'

allocation = 170000
usesymbols=['PAEL','TPL','SING','DCL','POWER','FCCL','DGKC','LUCK',
          'THCCL','PIOC','GWLC','CHCC','MLCF','FLYNG','EPCL',
          'LOTCHEM','SPL','DOL','NRSL','AGL','GGL','ICL','AKZO','ICI',
           'WAHN','BAPL','FFC','EFERT','FFBL','ENGRO','AHCL','FATIMA',
          'EFOODS','QUICE','ASC','TREET','ZIL','FFL','CLOV',
          'BGL','STCL','GGGL','TGL','GHGL','OGDC','POL','PPL','MARI',
          'SSGC','SNGP','HTL','PSO','SHEL','APL','HASCOL','RPL','MERIT',
          'GLAXO','SEARL','FEROZ','HINOON','ABOT','KEL','JPGL','EPQL',
          'HUBC','PKGP','NCPL','LPL','KAPCO','TSPL','ATRL','BYCO','NRL','PRL',
          'DWSM','SML','MZSM','IMSL','SKRS','HWQS','DSFL','TRG','PTC','TELE',
          'WTL','MDTL','AVN','NETSOL','SYS','HUMNL','PAKD',
          'ANL','CRTM','NML','NCL','GATM','CLCPS','GFIL','CHBL',
          'DFSM','KOSM','AMTEX','HIRAT','NCML','CTM','HMIM',
           'CWSM','RAVT','PIBTL','PICT','PNSC','ASL',
          'DSL','ISL','CSAP','MUGHAL','DKL','ASTL','INIL']

cost_matrix = []

def data(symbols):
    dates=pd.date_range(start_date,end_date) 
    df=pd.DataFrame(index=dates)
    for symbol in symbols:
        df_temp=pd.read_csv('/home/furqan/Desktop/python_data/{}.csv'.format(str(symbol)),usecols=['Date','Close'],
                            parse_dates=True,index_col='Date',na_values=['nan'])
        df_temp = df_temp.rename(columns={'Close': symbol})
        df=df.join(df_temp)
        df=df.fillna(method='ffill')
        df=df.fillna(method='bfill')
    return df

def mat_alloc_auto(symbols):

    n = len(symbols)
    mat_alloc = np.zeros((n,n), dtype='float')
    for i in range(0,n):
        mat_alloc[i,i] = allocation / n
    return mat_alloc

def compute_daily_returns(df):
    """Compute and return the daily return values."""
    daily_returns=(df/df.shift(1))-1
    df=df.fillna(value=0)
    daily_returns=daily_returns[1:]
    daily_returns = np.array(daily_returns)
    return daily_returns

def port_eval(matrix_alloc,daily_return_matrix):
    risk_free = 0
    amount_matrix = [allocation]
    return_mat = np.dot(daily_return_matrix,matrix_alloc)
    return_mat = np.sum(return_mat, axis=1, keepdims=True)
    return_mat = np.divide(return_mat,amount_matrix)
    mat_average = np.mean(return_mat)
    mat_std = np.std(return_mat, ddof=1)
    sharpe_ratio = ((mat_average-risk_free)/mat_std) * math.sqrt(252)
    return return_mat, sharpe_ratio, mat_average


def powerset(iterable):
    s = list(iterable)
    return chain.from_iterable(combinations(s, r) for r in range(1, len(s)+1))

power_set = list(powerset(usesymbols))
len_power = len(power_set)
sharpe = []
for j in range(0, len_power):
    df_01 = data(power_set[j])
    matrix_allocation = mat_alloc_auto(power_set[j])
    daily_return_mat = compute_daily_returns(df_01)
    return_matrix, sharpe_ratio_val, matrix_average = port_eval(matrix_allocation, daily_return_mat)
    sharpe.append(sharpe_ratio_val)


max_index, max_value = max(enumerate(sharpe), key=operator.itemgetter(1))
print('Maximum sharpe ratio occurs from ',power_set[max_index], ' value = ', max_value)

t1=t.time()
print('exec time is ', t1-t0, 'seconds')

The above code results in a sigkill error 9. After research I understood that it is because process allocates too much memory putting pressures on OS. So I tried running same code on HP Z600 workstation but it takes a lot of time plus the machine is freezes. My question is how can I make my code more efficient to get instant results.

0

There are 0 answers