diff options
| author | lian-manonog <lian.manonog@gmail.com> | 2024-08-20 19:58:15 +0100 |
|---|---|---|
| committer | lian-manonog <lian.manonog@gmail.com> | 2024-08-20 19:58:15 +0100 |
| commit | 687eaa762bb598c61e2385dc0462d7375f86f779 (patch) | |
| tree | fe8e9d7c0b13eb67d0687b56433f47cc34da0401 | |
| parent | 29eace351c8e35d104992119a3762ab07be1f95d (diff) | |
| download | de-project-bentley-687eaa762bb598c61e2385dc0462d7375f86f779.tar.gz de-project-bentley-687eaa762bb598c61e2385dc0462d7375f86f779.zip | |
wip: writing pseudocode logic for the lambda_handler
| -rw-r--r-- | src/transform_lambda.py | 36 |
1 files changed, 22 insertions, 14 deletions
diff --git a/src/transform_lambda.py b/src/transform_lambda.py index f62f1d4..2a97931 100644 --- a/src/transform_lambda.py +++ b/src/transform_lambda.py @@ -1,35 +1,43 @@ import json import boto3 +import re import io from io import StringIO import pandas as pd - ##add trigger window from extract bucket (on console?) ##suffix: must .csv --> reads only this file type that is uploaded to extract ##In-order to use PANDAS module in lambda function, a Lambda Layer needs to be attached to the AWS Lambda Function. ##need a function that normalises the data -s3_client = boto3.client('s3') + +s3_resource = boto3.resource('s3') ##need this for a way of reuploading data after transformation def lambda_handler(event, context): + s3_client = boto3.client('s3') + + # tables = ['sales_order', + # 'transaction', + # 'payment', + # 'counterparty', + # 'address', + # 'staff', + # 'purchase_order', + # 'department', + # 'currency', + # 'design', + # 'payment_type'] try: s3_bucket_name = event["Records"][0]["s3"]["bucket"]["name"] s3_file_name = event["Records"][0]["s3"]["object"]["key"] - + + ## concatanating the file per table - most recent + ## iterate through the subfolders + ## table name prefix to iterate through the files written to that table + object = s3_client.get_object(Bucket=s3_bucket_name, Key=s3_file_name) body = object['Body'] csv_string = body.read().decode('utf-8') dataframe = pd.read_csv(StringIO(csv_string)) ##this is the streaming body - - print(dataframe.head(3)) - - except Exception as err: - print(err) - - # TODO implement - return { - 'statusCode': 200, - 'body': json.dumps('') - }
\ No newline at end of file +
\ No newline at end of file |
