wip: writing pseudocode logic for the lambda_handler

author: lian-manonog <lian.manonog@gmail.com> 2024-08-20 19:58:15 +0100
committer: lian-manonog <lian.manonog@gmail.com> 2024-08-20 19:58:15 +0100
commit: 687eaa762bb598c61e2385dc0462d7375f86f779 (patch)
tree: fe8e9d7c0b13eb67d0687b56433f47cc34da0401
parent: 29eace351c8e35d104992119a3762ab07be1f95d (diff)
download: de-project-bentley-687eaa762bb598c61e2385dc0462d7375f86f779.tar.gz
de-project-bentley-687eaa762bb598c61e2385dc0462d7375f86f779.zip
1 files changed, 22 insertions, 14 deletions
diff --git a/src/transform_lambda.py b/src/transform_lambda.py
index f62f1d4..2a97931 100644
--- a/src/transform_lambda.py
+++ b/src/transform_lambda.py
@@ -1,35 +1,43 @@
 import json
 import boto3
+import re
 import io
 from io import StringIO
 import pandas as pd
 
-
 ##add trigger window from extract bucket (on console?)
 ##suffix: must .csv --> reads only this file type that is uploaded to extract
 ##In-order to use PANDAS module in lambda function, a Lambda Layer needs to be attached to the AWS Lambda Function.
 ##need a function that normalises the data
 
 
-s3_client = boto3.client('s3')
+
+s3_resource = boto3.resource('s3') ##need this for a way of reuploading data after transformation
 
 def lambda_handler(event, context):
+    s3_client = boto3.client('s3')  
+
+    # tables = ['sales_order', 
+    #           'transaction', 
+    #           'payment', 
+    #           'counterparty', 
+    #           'address', 
+    #           'staff', 
+    #           'purchase_order', 
+    #           'department', 
+    #           'currency', 
+    #           'design', 
+    #           'payment_type']
     try:
         s3_bucket_name = event["Records"][0]["s3"]["bucket"]["name"]
         s3_file_name = event["Records"][0]["s3"]["object"]["key"]
-        
+
+        ## concatanating the file per table - most recent
+        ## iterate through the subfolders
+        ## table name prefix to iterate through the files written to that table
+
         object = s3_client.get_object(Bucket=s3_bucket_name, Key=s3_file_name)
         body = object['Body']
         csv_string = body.read().decode('utf-8')
         dataframe = pd.read_csv(StringIO(csv_string)) ##this is the streaming body
-        
-        print(dataframe.head(3))
-
-    except Exception as err:
-        print(err)
-        
-    # TODO implement
-    return {
-        'statusCode': 200,
-        'body': json.dumps('')
-    }
-\ No newline at end of file
+        
+\ No newline at end of file
author	lian-manonog <lian.manonog@gmail.com>	2024-08-20 19:58:15 +0100
committer	lian-manonog <lian.manonog@gmail.com>	2024-08-20 19:58:15 +0100
commit	687eaa762bb598c61e2385dc0462d7375f86f779 (patch)
tree	fe8e9d7c0b13eb67d0687b56433f47cc34da0401
parent	29eace351c8e35d104992119a3762ab07be1f95d (diff)
download	de-project-bentley-687eaa762bb598c61e2385dc0462d7375f86f779.tar.gz de-project-bentley-687eaa762bb598c61e2385dc0462d7375f86f779.zip