3 files changed, 64 insertions, 6 deletions
diff --git a/requirements.txt b/requirements.txt
index 6f383f9..087d1c2 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,6 +1,6 @@
 asn1crypto==1.5.1
 boto3==1.34.159
-botocore==1.34.159
+botocore==1.34.7
 certifi==2024.7.4
 cffi==1.17.0
 charset-normalizer==3.3.2
diff --git a/src/transform_lambda.py b/src/transform_lambda.py
index 900bf4b..6f65728 100644
--- a/src/transform_lambda.py
+++ b/src/transform_lambda.py
@@ -1,3 +1,4 @@
+#from src.extract_lambda import extract_bucket
 import json
 import boto3
 import re
@@ -10,9 +11,7 @@ import pandas as pd
 ##In-order to use PANDAS module in lambda function, a Lambda Layer needs to be attached to the AWS Lambda Function.
 ##need a function that normalises the data
 
-
-
-s3_resource = boto3.resource('s3') ##need this for a way of reuploading data after transformation
+#s3_resource = boto3.resource('s3') ##need this for a way of reuploading data after transformation
 
 def lambda_handler(event, context):
     s3_client = boto3.client('s3')  
@@ -54,4 +53,31 @@ def lambda_handler(event, context):
 
 ## each csv file must be converted into a pandas df 
 ## done via read_csv, where stringIO creates an file-like-object from string - treats string like a file: as file is not physically stored in file 
-## each file needs its own panda df (?) to be normalised
-\ No newline at end of file
+## each file needs its own panda df (?) to be normalised
+tables = ['sales_order', 
+        'transaction', 
+        'payment', 
+        'counterparty', 
+        'address', 
+        'staff', 
+        'purchase_order', 
+        'department', 
+        'currency', 
+        'design', 
+        'payment_type']
+
+def read_from_s3_subfolder_to_df(tables, bucket, client=boto3.client('s3')):
+    table_dfs = {}
+    for table in tables:
+        response = client.list_objects_v2(Bucket=bucket, Prefix=table)
+        list_of_keys = ['s3://'+object['Key'] for object in response['Contents']] 
+        print(list_of_keys)
+        list_of_df = [pd.read_csv(key) for key in list_of_keys]
+        table_dfs[table] = pd.concat(list_of_df)
+    return table_dfs
+    #   exec("%s = %d" % (table,pd.concat(list_of_df)))
+    #     exec(f"{table} = {pd.concat(list_of_df)}")
+    # table_dfs = [sales_order, transaction, payment, counterparty, address,
+    #              staff, purchase_order, department, currency, design, payment_type]
+                
+        
diff --git a/tests/test_transform_lambda.py b/tests/test_transform_lambda.py
index dd08b6a..a3ec4a8 100644
--- a/tests/test_transform_lambda.py
+++ b/tests/test_transform_lambda.py
@@ -1 +1,33 @@
-from src.transform_lambda import lambda_handler
-\ No newline at end of file
+from src.transform_lambda import read_from_s3_subfolder_to_df
+from moto import mock_aws
+import pytest
+import pandas as pd
+import os
+import boto3
+
+@pytest.fixture(scope='class')
+def aws_credentials():
+    os.environ["AWS_ACCESS_KEY_ID"] = 'testing'
+    os.environ["AWS_SECRET_ACCESS_KEY"] = 'testing'
+    os.environ["AWS_SECURIT_TOKEN"] = 'testing'
+    os.environ["AWS_SESSION_TOKEN"] = 'testing'
+    os.environ["AWS_DEFAULT_REGION"]= 'eu-west-2'
+
+@pytest.fixture(scope='class')
+def s3_client(aws_credentials):
+    with mock_aws():
+        yield boto3.client('s3')
+class TestReadFromS3:
+    
+    def test_returns_dictionary_with_correct_value_pair(self,s3_client):
+        s3_client.create_bucket(Bucket = 'dummy_buc',CreateBucketConfiguration={
+                                    'LocationConstraint': 'eu-west-2'
+                                })
+        s3_client.upload_file('tests/dummy_identical.csv', 'dummy_buc', 'Foods/2024/08/21/Foods_12:03:10.csv')
+        tables = ['Foods']
+        result = read_from_s3_subfolder_to_df(tables,bucket='dummy_buc',client=s3_client)
+        print(result)
+        assert isinstance(result,dict)
+        assert list(result.keys()) == 'Foods'
+        assert isinstance(result['Foods'],pd.DataFrame)
+        
+\ No newline at end of file