python to C performance issue

147 views Asked by At

I have this python code

def transferIntListToIntArray(sourceTuple):

    myArrayLen = len(sourceTuple)

    if myArrayLen > 0:
       targetArray = (c_int * myArrayLen) (*sourceTuple)
       return targetArray

    else:
        return

def transformIntTupleToIntTab(Input):

    if Input != None:
        if type(Input) == int:
           Input = (Input,)
        return transferIntListToIntArray(Input);

    else:
      return None;

def myfunc(input):

    inputTab = transformIntTupleToIntTab(input)
   mylib.myfuncC.argtype = [type(inputTab)]
   return mylib.myfuncC(inputTab)

I have a python file with thousand calls of myFunc python function like that for example (myfunc((0,1,2,3)) and if I try to evaluate timing of python code this python line of code has an important cost:

targetArray = (c_int * myArrayLen) (*sourceTuple)

0.1 - 0.2 sec for 10000 calls of this function (python 2.5.1). Here it is just a basic example but my real code have several transformation of tuple python to int * or double * in C and i would like to know how to write more efficient Python code

example :

import time
from ctypes import *
from cmath import *

def transferIntListToIntArray(sourceTuple):
    myArrayLen=len(sourceTuple)
    if myArrayLen>0:
       targetArray = (c_int * myArrayLen) (*sourceTuple)
       return targetArray
    else:
        return


def transformIntTupleToIntTab(Input):
   if Input != None:
      if type(Input) == int:
         Input = (Input,)
      return transferIntListToIntArray(Input);
   else:
      return None;

def myfunc(iCurve):
   iCurveTab = transformIntTupleToIntTab(iCurve)
   return 0

test = (0,1,2,3,4,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,55,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5)

start = time.clock()

for i in range(100000):
   myfunc(test)
print "done, elapsed wall clock time (win32) in seconds: " , time.clock() - start

done, elapsed wall clock time (win32) in seconds: 0.497456968189

same example in python 2.7 1.65 seconds (strange)

Python report :

    done, elapsed wall clock time (win32) in seconds:  0.582374385947
         400091 function calls in 0.590 CPU seconds

   Ordered by: cumulative time

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
        1    0.000    0.000    0.590    0.590 <string>:1(<module>)
        1    0.001    0.001    0.590    0.590 {execfile}
        1    0.057    0.057    0.589    0.589 tupletest.py:1(<module>)
   100000    0.035    0.000    0.526    0.000 tupletest.py:22(myfunc)
   100000    0.068    0.000    0.491    0.000 tupletest.py:14(transformIntTupleToIntTab)
   100000    0.417    0.000    0.423    0.000 tupletest.py:5(transferIntListToIntArray)
   100003    0.006    0.000    0.006    0.000 {len}
        1    0.004    0.004    0.005    0.005 __init__.py:4(<module>)
        1    0.002    0.002    0.002    0.002 {range}
        1    0.000    0.000    0.000    0.000 _endian.py:4(<module>)
        4    0.000    0.000    0.000    0.000 __init__.py:83(CFUNCTYPE)
        2    0.000    0.000    0.000    0.000 __init__.py:211(POINTER)
        1    0.000    0.000    0.000    0.000 __init__.py:291(CDLL)
        1    0.000    0.000    0.000    0.000 __init__.py:335(PyDLL)
        1    0.000    0.000    0.000    0.000 __init__.py:441(PYFUNCTYPE)
        1    0.000    0.000    0.000    0.000 __init__.py:346(WinDLL)
        4    0.000    0.000    0.000    0.000 struct.py:43(calcsize)
        1    0.000    0.000    0.000    0.000 __init__.py:322(__getattr__)
        1    0.000    0.000    0.000    0.000 __init__.py:370(OleDLL)
        1    0.000    0.000    0.000    0.000 __init__.py:384(__getattr__)
        1    0.000    0.000    0.000    0.000 __init__.py:329(__getitem__)
        2    0.000    0.000    0.000    0.000 __init__.py:309(__init__)
        1    0.000    0.000    0.000    0.000 {_ctypes.LoadLibrary}
        3    0.000    0.000    0.000    0.000 struct.py:35(_compile)
        1    0.000    0.000    0.000    0.000 {_ctypes.set_conversion_mode}
        4    0.000    0.000    0.000    0.000 __init__.py:381(__init__)
        2    0.000    0.000    0.000    0.000 {time.clock}
       18    0.000    0.000    0.000    0.000 {_ctypes.sizeof}
        3    0.000    0.000    0.000    0.000 __init__.py:101(CFunctionType)
        1    0.000    0.000    0.000    0.000 {isinstance}
        1    0.000    0.000    0.000    0.000 __init__.py:442(CFunctionType)
        1    0.000    0.000    0.000    0.000 __init__.py:144(c_short)
        1    0.000    0.000    0.000    0.000 __init__.py:380(LibraryLoader)
        2    0.000    0.000    0.000    0.000 {setattr}
        1    0.000    0.000    0.000    0.000 _endian.py:22(_swapped_meta)
        1    0.000    0.000    0.000    0.000 __init__.py:340(_FuncPtr)
        1    0.000    0.000    0.000    0.000 __init__.py:136(py_object)
        1    0.000    0.000    0.000    0.000 {method 'startswith' of 'str' objects}
        1    0.000    0.000    0.000    0.000 _endian.py:45(BigEndianStructure)
        1    0.000    0.000    0.000    0.000 __init__.py:181(c_ulonglong)
        1    0.000    0.000    0.000    0.000 __init__.py:167(c_float)
        1    0.000    0.000    0.000    0.000 __init__.py:147(c_ushort)
        1    0.000    0.000    0.000    0.000 __init__.py:178(c_longlong)
        1    0.000    0.000    0.000    0.000 __init__.py:197(c_char)
        1    0.000    0.000    0.000    0.000 __init__.py:305(_FuncPtr)
        1    0.000    0.000    0.000    0.000 __init__.py:376(_FuncPtr)
        1    0.000    0.000    0.000    0.000 __init__.py:153(c_ulong)
        1    0.000    0.000    0.000    0.000 __init__.py:243(c_wchar)
        1    0.000    0.000    0.000    0.000 __init__.py:204(c_void_p)
        1    0.000    0.000    0.000    0.000 __init__.py:187(c_ubyte)
        1    0.000    0.000    0.000    0.000 __init__.py:201(c_char_p)
        1    0.000    0.000    0.000    0.000 __init__.py:240(c_wchar_p)
        1    0.000    0.000    0.000    0.000 __init__.py:150(c_long)
        1    0.000    0.000    0.000    0.000 {method 'disable' of '_lsprof.Profiler' objects}
        1    0.000    0.000    0.000    0.000 __init__.py:193(c_byte)
        1    0.000    0.000    0.000    0.000 __init__.py:170(c_double)
        1    0.000    0.000    0.000    0.000 __init__.py:357(HRESULT)
        1    0.000    0.000    0.000    0.000 __init__.py:350(_FuncPtr)

Example with 2 tabs : (aim here is to convert iCurve1 and iCurve2 into int * for C function) by calling mylibC.myfunC(iCurveTab1 , iCurveTab2) and C code is myfuncC(int *iCurveTab1, int *iCurveTab2)

import time
from ctypes import *
from cmath import *

def transferIntListToIntArray(sourceTuple):
    myArrayLen=len(sourceTuple)
    if myArrayLen>0:
       targetArray = (c_int * myArrayLen) (*sourceTuple)
       return targetArray
    else:
        return


def transformIntTupleToIntTab(Input):
   if Input != None:
      if type(Input) == int:
         Input = (Input,)
      return transferIntListToIntArray(Input);
   else:
      return None;

def myfunc(iCurve1, iCurve2):
   iCurveTab1 = transformIntTupleToIntTab(iCurve1)
   iCurveTab2 = transformIntTupleToIntTab(iCurve2)
   return 0

test = (0,1,2,3,4,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,55,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5)

start = time.clock()

for i in range(100000):
   myfunc(test, test)
print "done, elapsed wall clock time (win32) in seconds: " , time.clock() - start

In Python 2.5: done, elapsed wall clock time (win32) in seconds: 0.96631573455

In Python 2.7: done, elapsed wall clock time (win32) in seconds: 3.25996918937

  1. how I can improve this python code ? (nice previous idea of deets does not work because iCurveTab1 and iCurveTab2 will use the same C pointer
  2. It seems to have a regression in python 2.7 due to this code

My real code with c interface Python code mytest.py

 import time
    from ctypes import *
    from cmath import *

    def transferIntListToIntArray(sourceTuple):
        myArrayLen=len(sourceTuple)
        if myArrayLen>0:
           targetArray = (c_int * myArrayLen) (*sourceTuple)
           return targetArray
        else:
            return


    def transformIntTupleToIntTab(Input):
       if Input != None:
          if type(Input) == int:
             Input = (Input,)
          return transferIntListToIntArray(Input);
       else:
          return None;

    def myfunc(iCurve1, iCurve2):
       iCurveTab1 = transformIntTupleToIntTab(iCurve1)
       iCurveTab2 = transformIntTupleToIntTab(iCurve2)
       return mylibC.myfuncC(len(iCurveTab1), len(iCurveTab2), iCurveTab1, iCurveTab2)

C Code

void myfuncC(int ilen1, int ilen2, int *piCurve1, int *piCurve2)
{

  return;
}

if iCurveTab1 and iCurveTab2 share the same cache piCurve1 = piCurve2 and it is a problem because values are erased

My python which is executed :

from mytest *
myfunc((1,2,3,4),(7,8,9,10,11))

Thanks

1

There are 1 answers

10
deets On

Not recreating the arrays seems to save quite a bit of time for me.

 import time
 from ctypes import *
 from cmath import *


 iCurve1Cache = {}
 iCurve2Cache = {}

 def transferIntListToIntArray(sourceTuple, array_type_cache):
     myArrayLen=len(sourceTuple)
     if myArrayLen>0:
         if myArrayLen not in array_type_cache:
             array_type_cache[myArrayLen] = (c_int * myArrayLen)()

         targetArray = array_type_cache[myArrayLen]
         targetArray[:] = sourceTuple
         return targetArray
     else:
         return


 def transformIntTupleToIntTab(Input):
    if Input != None:
       if type(Input) == int:
          Input = (Input,)
       return transferIntListToIntArray(Input);
    else:
       return None;

 def myfunc(iCurve1, iCurve2):
    iCurveTab1 = transformIntTupleToIntTab(iCurve1, iCurve1Cache)
    iCurveTab2 = transformIntTupleToIntTab(iCurve2, iCurve2Cache)
    return 0

 test = (0,1,2,3,4,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,55,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5)

 start = time.clock()

 for i in range(100000):
    myfunc(test)
 print "done, elapsed wall clock time (win32) in seconds: " , time.clock() - start

On my mac, this brings down time from 1.267731 seconds to 0.36 seconds.

If you want more optimization, we need more info.