import cloudstorage as gcs
import logging
DEFAULT_BUFFER_SIZE = 1024 * 1024
default_retry_params = gcs.RetryParams(
initial_delay=0.2,
max_delay=5.0,
backoff_factor=2,
max_retry_period=15)
gcs.set_default_retry_params(default_retry_params)
class GCSClient(object):
def __init__(self):
logging.info('Created a GCS client')
self._gcs = gcs
@staticmethod
def create_gcs_client():
return GCSClient()
def readlines(self, filename, buffer_size=DEFAULT_BUFFER_SIZE):
with self._gcs.open(filename, mode='r', read_buffer_size=buffer_size) as gcs_file:
for line in gcs_file:
yield line
I am running this on a managed VM on GAE , when i log into the managed vm and look at the memory usage , i observe that it keeps increasing. The app basically reads data using this client and parses it to compute some statistics. The amount of data read is about 100 files 10Mb each , but i read it by chunk size.
Is this the correct usage of library ?