diff options
Diffstat (limited to 'src')
| -rw-r--r-- | src/transform_lambda.py | 40 |
1 files changed, 32 insertions, 8 deletions
diff --git a/src/transform_lambda.py b/src/transform_lambda.py index 7c29df9..f62f1d4 100644 --- a/src/transform_lambda.py +++ b/src/transform_lambda.py @@ -1,11 +1,35 @@ -import boto3 -import csv -from botocore.exceptions import ClientError -import logging import json -from datetime import datetime -import re +import boto3 +import io +from io import StringIO +import pandas as pd + + +##add trigger window from extract bucket (on console?) +##suffix: must .csv --> reads only this file type that is uploaded to extract +##In-order to use PANDAS module in lambda function, a Lambda Layer needs to be attached to the AWS Lambda Function. +##need a function that normalises the data + + +s3_client = boto3.client('s3') +def lambda_handler(event, context): + try: + s3_bucket_name = event["Records"][0]["s3"]["bucket"]["name"] + s3_file_name = event["Records"][0]["s3"]["object"]["key"] + + object = s3_client.get_object(Bucket=s3_bucket_name, Key=s3_file_name) + body = object['Body'] + csv_string = body.read().decode('utf-8') + dataframe = pd.read_csv(StringIO(csv_string)) ##this is the streaming body + + print(dataframe.head(3)) -def lambda_handler(): - pass
\ No newline at end of file + except Exception as err: + print(err) + + # TODO implement + return { + 'statusCode': 200, + 'body': json.dumps('') + }
\ No newline at end of file |
