function to write files from s3 into a list of dataframes. Current test is failing due to AioClientCreator object has no attribute "_inject_s3_input_parameters"

author: Ang Bel <anzelikabelotelova@Anzelikas-MacBook-Air.local> 2024-08-21 12:50:32 +0100
committer: Ang Bel <anzelikabelotelova@Anzelikas-MacBook-Air.local> 2024-08-21 12:50:32 +0100
commit: b4fafcd9731f11f6f2efde843242b9c5cb84e85f (patch)
tree: 3219d4d91fa763334e62daad95c5d2436daf3d16 /src
parent: 8a67c688b402fae27d47399b3ae04cc8475f82b7 (diff)
download: de-project-bentley-b4fafcd9731f11f6f2efde843242b9c5cb84e85f.tar.gz
de-project-bentley-b4fafcd9731f11f6f2efde843242b9c5cb84e85f.zip
1 files changed, 30 insertions, 4 deletions
diff --git a/src/transform_lambda.py b/src/transform_lambda.py
index 900bf4b..6f65728 100644
--- a/src/transform_lambda.py
+++ b/src/transform_lambda.py
@@ -1,3 +1,4 @@
+#from src.extract_lambda import extract_bucket
 import json
 import boto3
 import re
@@ -10,9 +11,7 @@ import pandas as pd
 ##In-order to use PANDAS module in lambda function, a Lambda Layer needs to be attached to the AWS Lambda Function.
 ##need a function that normalises the data
 
-
-
-s3_resource = boto3.resource('s3') ##need this for a way of reuploading data after transformation
+#s3_resource = boto3.resource('s3') ##need this for a way of reuploading data after transformation
 
 def lambda_handler(event, context):
     s3_client = boto3.client('s3')  
@@ -54,4 +53,31 @@ def lambda_handler(event, context):
 
 ## each csv file must be converted into a pandas df 
 ## done via read_csv, where stringIO creates an file-like-object from string - treats string like a file: as file is not physically stored in file 
-## each file needs its own panda df (?) to be normalised
-\ No newline at end of file
+## each file needs its own panda df (?) to be normalised
+tables = ['sales_order', 
+        'transaction', 
+        'payment', 
+        'counterparty', 
+        'address', 
+        'staff', 
+        'purchase_order', 
+        'department', 
+        'currency', 
+        'design', 
+        'payment_type']
+
+def read_from_s3_subfolder_to_df(tables, bucket, client=boto3.client('s3')):
+    table_dfs = {}
+    for table in tables:
+        response = client.list_objects_v2(Bucket=bucket, Prefix=table)
+        list_of_keys = ['s3://'+object['Key'] for object in response['Contents']] 
+        print(list_of_keys)
+        list_of_df = [pd.read_csv(key) for key in list_of_keys]
+        table_dfs[table] = pd.concat(list_of_df)
+    return table_dfs
+    #   exec("%s = %d" % (table,pd.concat(list_of_df)))
+    #     exec(f"{table} = {pd.concat(list_of_df)}")
+    # table_dfs = [sales_order, transaction, payment, counterparty, address,
+    #              staff, purchase_order, department, currency, design, payment_type]
+                
+
author	Ang Bel <anzelikabelotelova@Anzelikas-MacBook-Air.local>	2024-08-21 12:50:32 +0100
committer	Ang Bel <anzelikabelotelova@Anzelikas-MacBook-Air.local>	2024-08-21 12:50:32 +0100
commit	b4fafcd9731f11f6f2efde843242b9c5cb84e85f (patch)
tree	3219d4d91fa763334e62daad95c5d2436daf3d16 /src
parent	8a67c688b402fae27d47399b3ae04cc8475f82b7 (diff)
download	de-project-bentley-b4fafcd9731f11f6f2efde843242b9c5cb84e85f.tar.gz de-project-bentley-b4fafcd9731f11f6f2efde843242b9c5cb84e85f.zip