aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorlian-manonog <lian.manonog@gmail.com>2024-08-19 16:49:06 +0100
committerlian-manonog <lian.manonog@gmail.com>2024-08-19 16:49:06 +0100
commit29eace351c8e35d104992119a3762ab07be1f95d (patch)
treed43c3d30b38c0f37ae2bdf1dca0f71cc1baa7815
parent3f40e96217418590ca66af6912f595cc04425849 (diff)
downloadde-project-bentley-29eace351c8e35d104992119a3762ab07be1f95d.tar.gz
de-project-bentley-29eace351c8e35d104992119a3762ab07be1f95d.zip
wip: added read_csb functionailty to lambda_handler
-rw-r--r--src/transform_lambda.py40
1 files changed, 32 insertions, 8 deletions
diff --git a/src/transform_lambda.py b/src/transform_lambda.py
index 7c29df9..f62f1d4 100644
--- a/src/transform_lambda.py
+++ b/src/transform_lambda.py
@@ -1,11 +1,35 @@
-import boto3
-import csv
-from botocore.exceptions import ClientError
-import logging
import json
-from datetime import datetime
-import re
+import boto3
+import io
+from io import StringIO
+import pandas as pd
+
+
+##add trigger window from extract bucket (on console?)
+##suffix: must .csv --> reads only this file type that is uploaded to extract
+##In-order to use PANDAS module in lambda function, a Lambda Layer needs to be attached to the AWS Lambda Function.
+##need a function that normalises the data
+
+
+s3_client = boto3.client('s3')
+def lambda_handler(event, context):
+ try:
+ s3_bucket_name = event["Records"][0]["s3"]["bucket"]["name"]
+ s3_file_name = event["Records"][0]["s3"]["object"]["key"]
+
+ object = s3_client.get_object(Bucket=s3_bucket_name, Key=s3_file_name)
+ body = object['Body']
+ csv_string = body.read().decode('utf-8')
+ dataframe = pd.read_csv(StringIO(csv_string)) ##this is the streaming body
+
+ print(dataframe.head(3))
-def lambda_handler():
- pass \ No newline at end of file
+ except Exception as err:
+ print(err)
+
+ # TODO implement
+ return {
+ 'statusCode': 200,
+ 'body': json.dumps('')
+ } \ No newline at end of file
git.ajschof.me — hosted by ajschofield — powered by cgit