diff options
| author | lian-manonog <lian.manonog@gmail.com> | 2024-08-20 21:12:11 +0100 |
|---|---|---|
| committer | lian-manonog <lian.manonog@gmail.com> | 2024-08-20 21:12:11 +0100 |
| commit | 8a67c688b402fae27d47399b3ae04cc8475f82b7 (patch) | |
| tree | cc0611729f33b45bafa8eadb1d614edccf96bbfc | |
| parent | 687eaa762bb598c61e2385dc0462d7375f86f779 (diff) | |
| download | de-project-bentley-8a67c688b402fae27d47399b3ae04cc8475f82b7.tar.gz de-project-bentley-8a67c688b402fae27d47399b3ae04cc8475f82b7.zip | |
wip: just more pseudocode
| -rw-r--r-- | src/transform_lambda.py | 38 |
1 files changed, 26 insertions, 12 deletions
diff --git a/src/transform_lambda.py b/src/transform_lambda.py index 2a97931..900bf4b 100644 --- a/src/transform_lambda.py +++ b/src/transform_lambda.py @@ -17,17 +17,17 @@ s3_resource = boto3.resource('s3') ##need this for a way of reuploading data aft def lambda_handler(event, context): s3_client = boto3.client('s3') - # tables = ['sales_order', - # 'transaction', - # 'payment', - # 'counterparty', - # 'address', - # 'staff', - # 'purchase_order', - # 'department', - # 'currency', - # 'design', - # 'payment_type'] + tables = ['sales_order', + 'transaction', + 'payment', + 'counterparty', + 'address', + 'staff', + 'purchase_order', + 'department', + 'currency', + 'design', + 'payment_type'] try: s3_bucket_name = event["Records"][0]["s3"]["bucket"]["name"] s3_file_name = event["Records"][0]["s3"]["object"]["key"] @@ -40,4 +40,18 @@ def lambda_handler(event, context): body = object['Body'] csv_string = body.read().decode('utf-8') dataframe = pd.read_csv(StringIO(csv_string)) ##this is the streaming body -
\ No newline at end of file + + print(dataframe.head(3)) + + except Exception as err: + print(err) + + # TODO implement + return { + 'statusCode': 200, + 'body': json.dumps('') + } + +## each csv file must be converted into a pandas df +## done via read_csv, where stringIO creates an file-like-object from string - treats string like a file: as file is not physically stored in file +## each file needs its own panda df (?) to be normalised
\ No newline at end of file |
