aboutsummaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorAng Bel <anzelikabelotelova@Anzelikas-MacBook-Air.local>2024-08-21 12:50:32 +0100
committerAng Bel <anzelikabelotelova@Anzelikas-MacBook-Air.local>2024-08-21 12:50:32 +0100
commitb4fafcd9731f11f6f2efde843242b9c5cb84e85f (patch)
tree3219d4d91fa763334e62daad95c5d2436daf3d16 /src
parent8a67c688b402fae27d47399b3ae04cc8475f82b7 (diff)
downloadde-project-bentley-b4fafcd9731f11f6f2efde843242b9c5cb84e85f.tar.gz
de-project-bentley-b4fafcd9731f11f6f2efde843242b9c5cb84e85f.zip
function to write files from s3 into a list of dataframes. Current test is failing due to AioClientCreator object has no attribute "_inject_s3_input_parameters"
Diffstat (limited to 'src')
-rw-r--r--src/transform_lambda.py34
1 files changed, 30 insertions, 4 deletions
diff --git a/src/transform_lambda.py b/src/transform_lambda.py
index 900bf4b..6f65728 100644
--- a/src/transform_lambda.py
+++ b/src/transform_lambda.py
@@ -1,3 +1,4 @@
+#from src.extract_lambda import extract_bucket
import json
import boto3
import re
@@ -10,9 +11,7 @@ import pandas as pd
##In-order to use PANDAS module in lambda function, a Lambda Layer needs to be attached to the AWS Lambda Function.
##need a function that normalises the data
-
-
-s3_resource = boto3.resource('s3') ##need this for a way of reuploading data after transformation
+#s3_resource = boto3.resource('s3') ##need this for a way of reuploading data after transformation
def lambda_handler(event, context):
s3_client = boto3.client('s3')
@@ -54,4 +53,31 @@ def lambda_handler(event, context):
## each csv file must be converted into a pandas df
## done via read_csv, where stringIO creates an file-like-object from string - treats string like a file: as file is not physically stored in file
-## each file needs its own panda df (?) to be normalised \ No newline at end of file
+## each file needs its own panda df (?) to be normalised
+tables = ['sales_order',
+ 'transaction',
+ 'payment',
+ 'counterparty',
+ 'address',
+ 'staff',
+ 'purchase_order',
+ 'department',
+ 'currency',
+ 'design',
+ 'payment_type']
+
+def read_from_s3_subfolder_to_df(tables, bucket, client=boto3.client('s3')):
+ table_dfs = {}
+ for table in tables:
+ response = client.list_objects_v2(Bucket=bucket, Prefix=table)
+ list_of_keys = ['s3://'+object['Key'] for object in response['Contents']]
+ print(list_of_keys)
+ list_of_df = [pd.read_csv(key) for key in list_of_keys]
+ table_dfs[table] = pd.concat(list_of_df)
+ return table_dfs
+ # exec("%s = %d" % (table,pd.concat(list_of_df)))
+ # exec(f"{table} = {pd.concat(list_of_df)}")
+ # table_dfs = [sales_order, transaction, payment, counterparty, address,
+ # staff, purchase_order, department, currency, design, payment_type]
+
+
git.ajschof.me — hosted by ajschofield — powered by cgit