aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--src/transform_lambda.py36
-rw-r--r--tests/test_transform_lambda.py94
2 files changed, 79 insertions, 51 deletions
diff --git a/src/transform_lambda.py b/src/transform_lambda.py
index 3a7cf43..b176ccc 100644
--- a/src/transform_lambda.py
+++ b/src/transform_lambda.py
@@ -1,4 +1,4 @@
-#from src.extract_lambda import extract_bucket
+# from src.extract_lambda import extract_bucket
import json
import boto3
import re
@@ -6,29 +6,33 @@ import io
from io import StringIO
import pandas as pd
+
def lambda_handler(event, context):
pass
-tables = ['sales_order',
- 'transaction',
- 'payment',
- 'counterparty',
- 'address',
- 'staff',
- 'purchase_order',
- 'department',
- 'currency',
- 'design',
- 'payment_type']
+tables = [
+ "sales_order",
+ "transaction",
+ "payment",
+ "counterparty",
+ "address",
+ "staff",
+ "purchase_order",
+ "department",
+ "currency",
+ "design",
+ "payment_type",
+]
+
-def read_from_s3_subfolder_to_df(tables, bucket, client=boto3.client('s3')):
+def read_from_s3_subfolder_to_df(tables, bucket, client=boto3.client("s3")):
table_dfs = {}
for table in tables:
response = client.list_objects_v2(Bucket=bucket, Prefix=table)
- list_of_keys = ['s3://'+bucket+'/'+object['Key'] for object in response['Contents']]
+ list_of_keys = [
+ "s3://" + bucket + "/" + object["Key"] for object in response["Contents"]
+ ]
list_of_df = [pd.read_csv(key) for key in list_of_keys]
table_dfs[table] = pd.concat(list_of_df)
return table_dfs
-
-
diff --git a/tests/test_transform_lambda.py b/tests/test_transform_lambda.py
index 7de1bf3..5121905 100644
--- a/tests/test_transform_lambda.py
+++ b/tests/test_transform_lambda.py
@@ -6,45 +6,69 @@ import os
import boto3
import numpy as np
-@pytest.fixture(scope='class')
+
+@pytest.fixture(scope="class")
def aws_credentials():
- os.environ["AWS_ACCESS_KEY_ID"] = 'testing'
- os.environ["AWS_SECRET_ACCESS_KEY"] = 'testing'
- os.environ["AWS_SECURIT_TOKEN"] = 'testing'
- os.environ["AWS_SESSION_TOKEN"] = 'testing'
- os.environ["AWS_DEFAULT_REGION"]= 'eu-west-2'
+ os.environ["AWS_ACCESS_KEY_ID"] = "testing"
+ os.environ["AWS_SECRET_ACCESS_KEY"] = "testing"
+ os.environ["AWS_SECURIT_TOKEN"] = "testing"
+ os.environ["AWS_SESSION_TOKEN"] = "testing"
+ os.environ["AWS_DEFAULT_REGION"] = "eu-west-2"
+
-@pytest.fixture(scope='class')
+@pytest.fixture(scope="class")
def s3_client(aws_credentials):
with mock_aws():
- yield boto3.client('s3')
+ yield boto3.client("s3")
+
+
class TestReadFromS3:
-
- def test_returns_dictionary_with_correct_value_pair(self,s3_client):
- s3_client.create_bucket(Bucket = 'dummy_buc',CreateBucketConfiguration={
- 'LocationConstraint': 'eu-west-2'
- })
- s3_client.upload_file('tests/dummy_identical.csv', 'dummy_buc', 'Foods/2024/08/21/Foods_12:03:10.csv')
- tables = ['Foods']
- result = read_from_s3_subfolder_to_df(tables,bucket='dummy_buc',client=s3_client)
+ def test_returns_dictionary_with_correct_value_pair(self, s3_client):
+ s3_client.create_bucket(
+ Bucket="dummy_buc",
+ CreateBucketConfiguration={"LocationConstraint": "eu-west-2"},
+ )
+ s3_client.upload_file(
+ "tests/dummy_identical.csv",
+ "dummy_buc",
+ "Foods/2024/08/21/Foods_12:03:10.csv",
+ )
+ tables = ["Foods"]
+ result = read_from_s3_subfolder_to_df(
+ tables, bucket="dummy_buc", client=s3_client
+ )
print(result)
- expected_df = pd.DataFrame(np.array([['Vegetable', 'Sour', 'Green'], ['Berry', 'Sweet', 'Red']]),
- columns=['Food_type', 'Flavour', 'Colour'])
- assert isinstance(result,dict)
- assert list(result.keys())[0] == 'Foods'
- assert isinstance(result['Foods'],pd.DataFrame)
- assert result['Foods'].eq(expected_df,axis='columns').all(axis=None)
-
- def test_returns_dictionary_of_dataframes_for_multiple_tables(self,s3_client):
- s3_client.upload_file('tests/dummy_2.csv', 'dummy_buc', 'Cars/2024/08/21/Cars_14:03:56.csv')
- tables = ['Foods','Cars']
- result = read_from_s3_subfolder_to_df(tables,bucket='dummy_buc',client=s3_client)
- expected_foods_df = pd.DataFrame(np.array([['Vegetable', 'Sour', 'Green'], ['Berry', 'Sweet', 'Red']]),
- columns=['Food_type', 'Flavour', 'Colour'])
- expected_cars_df = pd.DataFrame(np.array([['Truck', 'Chevrolet', 'Grey'], ['Convertible', 'Mercedes','Red'],['Van','Volkswagen','Blue']]),
- columns=['Car_type', 'Brand', 'Colour'])
- assert list(result.keys()) == tables
- assert result['Foods'].eq(expected_foods_df,axis='columns').all(axis=None)
- assert result['Cars'].eq(expected_cars_df,axis='columns').all(axis=None)
-
+ expected_df = pd.DataFrame(
+ np.array([["Vegetable", "Sour", "Green"], ["Berry", "Sweet", "Red"]]),
+ columns=["Food_type", "Flavour", "Colour"],
+ )
+ assert isinstance(result, dict)
+ assert list(result.keys())[0] == "Foods"
+ assert isinstance(result["Foods"], pd.DataFrame)
+ assert result["Foods"].eq(expected_df, axis="columns").all(axis=None)
+ def test_returns_dictionary_of_dataframes_for_multiple_tables(self, s3_client):
+ s3_client.upload_file(
+ "tests/dummy_2.csv", "dummy_buc", "Cars/2024/08/21/Cars_14:03:56.csv"
+ )
+ tables = ["Foods", "Cars"]
+ result = read_from_s3_subfolder_to_df(
+ tables, bucket="dummy_buc", client=s3_client
+ )
+ expected_foods_df = pd.DataFrame(
+ np.array([["Vegetable", "Sour", "Green"], ["Berry", "Sweet", "Red"]]),
+ columns=["Food_type", "Flavour", "Colour"],
+ )
+ expected_cars_df = pd.DataFrame(
+ np.array(
+ [
+ ["Truck", "Chevrolet", "Grey"],
+ ["Convertible", "Mercedes", "Red"],
+ ["Van", "Volkswagen", "Blue"],
+ ]
+ ),
+ columns=["Car_type", "Brand", "Colour"],
+ )
+ assert list(result.keys()) == tables
+ assert result["Foods"].eq(expected_foods_df, axis="columns").all(axis=None)
+ assert result["Cars"].eq(expected_cars_df, axis="columns").all(axis=None)
git.ajschof.me — hosted by ajschofield — powered by cgit