I am trying to used Boto3 moto's mocks3 utility to test my code that connects to s3, the function basically list down all the folders with partition date and returns the latest one. I see no exception in mocking s3 buckets however the test code doesn't seem to find that buclet
My Test spec
import os
import unittest
from botocore.client import ClientError
from moto import mock_s3
from src.utils.aws_utils import *
import logging
log = logging.getLogger("my-logger")
MY_BUCKET = "mock_s3_bucket"
MY_PREFIX = "mock_folder"
@mock_s3
class TestPysparkUtils(unittest.TestCase):
def setUp(self):
s3 = boto3.resource(
"s3",
region_name="us-east-1",
aws_access_key_id="fake_access_key",
aws_secret_access_key="fake_secret_key",
)
s3.create_bucket(Bucket="{}".format(MY_BUCKET))
s3.Bucket(MY_BUCKET).put_object(Key='{}/{}/partition_date=20201223/file_20201223.txt'
.format(MY_BUCKET, MY_PREFIX), Body='def')
s3.Bucket(MY_BUCKET).put_object(Key='{}/{}/partition_date=20201222/file_20201222.txt'
.format(MY_BUCKET, MY_PREFIX), Body='abc')
def tearDown(self):
s3 = boto3.resource(
"s3",
region_name="us-east-1",
aws_access_key_id="fake_access_key",
aws_secret_access_key="fake_secret_key",
)
bucket = s3.Bucket(MY_BUCKET)
for key in bucket.objects.all():
key.delete()
bucket.delete()
def test_get_latest_file_path_inter(self):
print('{}/{}'.format(MY_BUCKET, MY_PREFIX))
s3 = boto3.resource(
"s3",
region_name="us-east-1",
aws_access_key_id="fake_access_key",
aws_secret_access_key="fake_secret_key",
)
try:
s3.meta.client.head_bucket(Bucket=MY_BUCKET)
print("Bucket Exists!")
except ClientError:
log.info('The bucket does not exist or you have no access.')
result = get_latest_file_path_inter(log, s3, 's3://{}/{}/'.format(MY_BUCKET, MY_PREFIX), 'partition_date')
print('------------------------')
print(result)
desired_result = ["foo.json", "bar.json"]
self.assertCountEqual(result, desired_result)
if __name__ == "__main__":
unittest.main()
Test Function
def get_latest_file_path_inter(logger, s3_client, base_path, partition):
"""
Returns full s3 path of latest partition assuming partition date is of format yyyyMMdd
:type (object, str, str) -> (str)
:parameter
:param logger Logger object
:param s3_client boto3 s3 client object
:param base_path Base s3 path
:param partition column name
"""
print("Inside get_latest_file_path_inter() : Given: {} {}".format(base_path, partition))
start = base_path.find("//") + 2
end = base_path.find("/", start)
bucket_in = base_path[start:end]
prefix_in = base_path[base_path.find(bucket_in) + len(bucket_in) + 1:]
print(
"bucket: {} | prefix: {} | partition: {} | path: s3://{}/{}".format(bucket_in, prefix_in, partition,
bucket_in, prefix_in))
objects = list(s3_client.Bucket(bucket_in).objects.filter(Prefix=prefix_in))
print("total objects found: {}".format(len(objects)))
dict_out = {}
if len(objects) == 0:
logger.info("Error. no files found")
return
for i in range(0, len(objects)):
file_str = objects[i].key
start = file_str.find(partition) + len(partition)
end = file_str.find("/", start)
part_found = file_str[start:end]
partial_path = file_str[:file_str.find(partition) + len(partition) + 8]
dict_out[part_found] = partial_path
dict_sort = collections.OrderedDict(sorted(dict_out.items()))
last = list(dict_sort.keys())[len(dict_sort) - 1]
path_final = "s3://{}/{}/".format(bucket_in, dict_sort.get(last))
print("path_final: {} for base_path: {} and partition: {} and last: {} and dict_sort: {}".format(
path_final, base_path, partition, last, dict_sort))
return path_final
Output
mock_s3_bucket/mock_folder
Inside get_latest_file_path_inter() : Given: s3://mock_s3_bucket/mock_folder/ partition_date
bucket: mock_s3_bucket | prefix: mock_folder/ | partition: partition_date | path: s3://mock_s3_bucket/mock_folder/
s3.Bucket(name='mock_s3_bucket')
total objects found: 0
------------------------
None
Got it working, I was mixing boto3 client and boto3 resource apis in test spec and its corresponding functions. After figuring out the difference between both, I changed everything to boto3 client api and got it working. Below is the modified function and its corresponding spec.