diff options
| -rw-r--r-- | requirements.txt | 8 | ||||
| -rw-r--r-- | src/transform_lambda.py | 37 | ||||
| -rw-r--r-- | tests/dummy_2.csv | 5 | ||||
| -rw-r--r-- | tests/test_transform_lambda.py | 74 |
4 files changed, 120 insertions, 4 deletions
diff --git a/requirements.txt b/requirements.txt index 6f383f9..62ebbf4 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,6 +1,6 @@ asn1crypto==1.5.1 -boto3==1.34.159 -botocore==1.34.159 +boto3 +botocore certifi==2024.7.4 cffi==1.17.0 charset-normalizer==3.3.2 @@ -27,4 +27,6 @@ scramp==1.4.5 six==1.16.0 urllib3==2.2.2 Werkzeug==3.0.3 -xmltodict==0.13.0
\ No newline at end of file +xmltodict==0.13.0 +s3fs +pandas
\ No newline at end of file diff --git a/src/transform_lambda.py b/src/transform_lambda.py index c6a8e60..9238180 100644 --- a/src/transform_lambda.py +++ b/src/transform_lambda.py @@ -1,2 +1,37 @@ -def lambda_handler(): +import json +import boto3 +import re +import io +from io import StringIO +import pandas as pd + + +def lambda_handler(event, context): pass + + +tables = [ + "sales_order", + "transaction", + "payment", + "counterparty", + "address", + "staff", + "purchase_order", + "department", + "currency", + "design", + "payment_type", +] + + +def read_from_s3_subfolder_to_df(tables, bucket, client=boto3.client("s3")): + table_dfs = {} + for table in tables: + response = client.list_objects_v2(Bucket=bucket, Prefix=table) + list_of_keys = [ + "s3://" + bucket + "/" + object["Key"] for object in response["Contents"] + ] + list_of_df = [pd.read_csv(key) for key in list_of_keys] + table_dfs[table] = pd.concat(list_of_df) + return table_dfs diff --git a/tests/dummy_2.csv b/tests/dummy_2.csv new file mode 100644 index 0000000..8abc9bf --- /dev/null +++ b/tests/dummy_2.csv @@ -0,0 +1,5 @@ +Car_type,Brand,Colour +Truck,Chevrolet,Grey +Convertible,Mercedes,Red +Van,Volkswagen,Blue + diff --git a/tests/test_transform_lambda.py b/tests/test_transform_lambda.py new file mode 100644 index 0000000..5121905 --- /dev/null +++ b/tests/test_transform_lambda.py @@ -0,0 +1,74 @@ +from src.transform_lambda import read_from_s3_subfolder_to_df +from moto import mock_aws +import pytest +import pandas as pd +import os +import boto3 +import numpy as np + + +@pytest.fixture(scope="class") +def aws_credentials(): + os.environ["AWS_ACCESS_KEY_ID"] = "testing" + os.environ["AWS_SECRET_ACCESS_KEY"] = "testing" + os.environ["AWS_SECURIT_TOKEN"] = "testing" + os.environ["AWS_SESSION_TOKEN"] = "testing" + os.environ["AWS_DEFAULT_REGION"] = "eu-west-2" + + +@pytest.fixture(scope="class") +def s3_client(aws_credentials): + with mock_aws(): + yield boto3.client("s3") + + +class TestReadFromS3: + def test_returns_dictionary_with_correct_value_pair(self, s3_client): + s3_client.create_bucket( + Bucket="dummy_buc", + CreateBucketConfiguration={"LocationConstraint": "eu-west-2"}, + ) + s3_client.upload_file( + "tests/dummy_identical.csv", + "dummy_buc", + "Foods/2024/08/21/Foods_12:03:10.csv", + ) + tables = ["Foods"] + result = read_from_s3_subfolder_to_df( + tables, bucket="dummy_buc", client=s3_client + ) + print(result) + expected_df = pd.DataFrame( + np.array([["Vegetable", "Sour", "Green"], ["Berry", "Sweet", "Red"]]), + columns=["Food_type", "Flavour", "Colour"], + ) + assert isinstance(result, dict) + assert list(result.keys())[0] == "Foods" + assert isinstance(result["Foods"], pd.DataFrame) + assert result["Foods"].eq(expected_df, axis="columns").all(axis=None) + + def test_returns_dictionary_of_dataframes_for_multiple_tables(self, s3_client): + s3_client.upload_file( + "tests/dummy_2.csv", "dummy_buc", "Cars/2024/08/21/Cars_14:03:56.csv" + ) + tables = ["Foods", "Cars"] + result = read_from_s3_subfolder_to_df( + tables, bucket="dummy_buc", client=s3_client + ) + expected_foods_df = pd.DataFrame( + np.array([["Vegetable", "Sour", "Green"], ["Berry", "Sweet", "Red"]]), + columns=["Food_type", "Flavour", "Colour"], + ) + expected_cars_df = pd.DataFrame( + np.array( + [ + ["Truck", "Chevrolet", "Grey"], + ["Convertible", "Mercedes", "Red"], + ["Van", "Volkswagen", "Blue"], + ] + ), + columns=["Car_type", "Brand", "Colour"], + ) + assert list(result.keys()) == tables + assert result["Foods"].eq(expected_foods_df, axis="columns").all(axis=None) + assert result["Cars"].eq(expected_cars_df, axis="columns").all(axis=None) |
