From b882bb03882ce91c25880defb1461bfbd09dce43 Mon Sep 17 00:00:00 2001 From: Ang Bel Date: Wed, 21 Aug 2024 15:48:41 +0100 Subject: complete version of read from s3 subfolder --- src/transform_lambda.py | 33 +-------------------------------- 1 file changed, 1 insertion(+), 32 deletions(-) (limited to 'src') diff --git a/src/transform_lambda.py b/src/transform_lambda.py index ea4e16f..3a7cf43 100644 --- a/src/transform_lambda.py +++ b/src/transform_lambda.py @@ -6,40 +6,9 @@ import io from io import StringIO import pandas as pd -##add trigger window from extract bucket (on console?) -##suffix: must .csv --> reads only this file type that is uploaded to extract -##In-order to use PANDAS module in lambda function, a Lambda Layer needs to be attached to the AWS Lambda Function. -##need a function that normalises the data - -#s3_resource = boto3.resource('s3') ##need this for a way of reuploading data after transformation - def lambda_handler(event, context): - s3_client = boto3.client('s3') - try: - s3_bucket_name = event["Records"][0]["s3"]["bucket"]["name"] - s3_file_name = event["Records"][0]["s3"]["object"]["key"] - - ## concatanating the file per table - most recent - ## iterate through the subfolders - ## table name prefix to iterate through the files written to that table - - object = s3_client.get_object(Bucket=s3_bucket_name, Key=s3_file_name) - body = object['Body'] - csv_string = body.read().decode('utf-8') - dataframe = pd.read_csv(StringIO(csv_string)) ##this is the streaming body - - print(dataframe.head(3)) - - except Exception as err: - print(err) - - # TODO implement - return { - 'statusCode': 200, - 'body': json.dumps('') - } + pass -## Started from fresh on Wed 21st Aug: tables = ['sales_order', 'transaction', -- cgit v1.2.3