diff options
| author | bulve-ad <78788030+bulve-ad@users.noreply.github.com> | 2024-08-21 15:51:23 +0100 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2024-08-21 15:51:23 +0100 |
| commit | 2af81a3d772bb9c275d0d48ad2284c50b0dd8f8a (patch) | |
| tree | b8e77c62b6a2d50ab04215beb54055d14210a423 /src/transform_lambda.py | |
| parent | d01d3bed939d7a17ea2205af502baeeb35510b5c (diff) | |
| parent | ce76bbb2b32b58a93d88db4abdb1bbfbf27243ea (diff) | |
| download | de-project-bentley-2af81a3d772bb9c275d0d48ad2284c50b0dd8f8a.tar.gz de-project-bentley-2af81a3d772bb9c275d0d48ad2284c50b0dd8f8a.zip | |
Merge pull request #84 from ajschofield/feature/transform_lambda
Feature/transform lambda read from s3 into df
Diffstat (limited to 'src/transform_lambda.py')
| -rw-r--r-- | src/transform_lambda.py | 37 |
1 files changed, 36 insertions, 1 deletions
diff --git a/src/transform_lambda.py b/src/transform_lambda.py index c6a8e60..9238180 100644 --- a/src/transform_lambda.py +++ b/src/transform_lambda.py @@ -1,2 +1,37 @@ -def lambda_handler(): +import json +import boto3 +import re +import io +from io import StringIO +import pandas as pd + + +def lambda_handler(event, context): pass + + +tables = [ + "sales_order", + "transaction", + "payment", + "counterparty", + "address", + "staff", + "purchase_order", + "department", + "currency", + "design", + "payment_type", +] + + +def read_from_s3_subfolder_to_df(tables, bucket, client=boto3.client("s3")): + table_dfs = {} + for table in tables: + response = client.list_objects_v2(Bucket=bucket, Prefix=table) + list_of_keys = [ + "s3://" + bucket + "/" + object["Key"] for object in response["Contents"] + ] + list_of_df = [pd.read_csv(key) for key in list_of_keys] + table_dfs[table] = pd.concat(list_of_df) + return table_dfs |
