aboutsummaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorAng Bel <anzelikabelotelova@Anzelikas-MacBook-Air.local>2024-08-21 15:48:41 +0100
committerAng Bel <anzelikabelotelova@Anzelikas-MacBook-Air.local>2024-08-21 15:48:41 +0100
commitb882bb03882ce91c25880defb1461bfbd09dce43 (patch)
tree7bca982cee52c368b10e47af7bd4ce0e137795a8 /src
parent0c6e2f8486d1ec4d9b0bd4984e01baca3a159df0 (diff)
downloadde-project-bentley-b882bb03882ce91c25880defb1461bfbd09dce43.tar.gz
de-project-bentley-b882bb03882ce91c25880defb1461bfbd09dce43.zip
complete version of read from s3 subfolder
Diffstat (limited to 'src')
-rw-r--r--src/transform_lambda.py33
1 files changed, 1 insertions, 32 deletions
diff --git a/src/transform_lambda.py b/src/transform_lambda.py
index ea4e16f..3a7cf43 100644
--- a/src/transform_lambda.py
+++ b/src/transform_lambda.py
@@ -6,40 +6,9 @@ import io
from io import StringIO
import pandas as pd
-##add trigger window from extract bucket (on console?)
-##suffix: must .csv --> reads only this file type that is uploaded to extract
-##In-order to use PANDAS module in lambda function, a Lambda Layer needs to be attached to the AWS Lambda Function.
-##need a function that normalises the data
-
-#s3_resource = boto3.resource('s3') ##need this for a way of reuploading data after transformation
-
def lambda_handler(event, context):
- s3_client = boto3.client('s3')
- try:
- s3_bucket_name = event["Records"][0]["s3"]["bucket"]["name"]
- s3_file_name = event["Records"][0]["s3"]["object"]["key"]
-
- ## concatanating the file per table - most recent
- ## iterate through the subfolders
- ## table name prefix to iterate through the files written to that table
-
- object = s3_client.get_object(Bucket=s3_bucket_name, Key=s3_file_name)
- body = object['Body']
- csv_string = body.read().decode('utf-8')
- dataframe = pd.read_csv(StringIO(csv_string)) ##this is the streaming body
-
- print(dataframe.head(3))
-
- except Exception as err:
- print(err)
-
- # TODO implement
- return {
- 'statusCode': 200,
- 'body': json.dumps('')
- }
+ pass
-## Started from fresh on Wed 21st Aug:
tables = ['sales_order',
'transaction',
git.ajschof.me — hosted by ajschofield — powered by cgit