aboutsummaryrefslogtreecommitdiffstats
path: root/src/transform_lambda.py
diff options
context:
space:
mode:
authorbulve-ad <78788030+bulve-ad@users.noreply.github.com>2024-08-21 15:51:23 +0100
committerGitHub <noreply@github.com>2024-08-21 15:51:23 +0100
commit2af81a3d772bb9c275d0d48ad2284c50b0dd8f8a (patch)
treeb8e77c62b6a2d50ab04215beb54055d14210a423 /src/transform_lambda.py
parentd01d3bed939d7a17ea2205af502baeeb35510b5c (diff)
parentce76bbb2b32b58a93d88db4abdb1bbfbf27243ea (diff)
downloadde-project-bentley-2af81a3d772bb9c275d0d48ad2284c50b0dd8f8a.tar.gz
de-project-bentley-2af81a3d772bb9c275d0d48ad2284c50b0dd8f8a.zip
Merge pull request #84 from ajschofield/feature/transform_lambda
Feature/transform lambda read from s3 into df
Diffstat (limited to 'src/transform_lambda.py')
-rw-r--r--src/transform_lambda.py37
1 files changed, 36 insertions, 1 deletions
diff --git a/src/transform_lambda.py b/src/transform_lambda.py
index c6a8e60..9238180 100644
--- a/src/transform_lambda.py
+++ b/src/transform_lambda.py
@@ -1,2 +1,37 @@
-def lambda_handler():
+import json
+import boto3
+import re
+import io
+from io import StringIO
+import pandas as pd
+
+
+def lambda_handler(event, context):
pass
+
+
+tables = [
+ "sales_order",
+ "transaction",
+ "payment",
+ "counterparty",
+ "address",
+ "staff",
+ "purchase_order",
+ "department",
+ "currency",
+ "design",
+ "payment_type",
+]
+
+
+def read_from_s3_subfolder_to_df(tables, bucket, client=boto3.client("s3")):
+ table_dfs = {}
+ for table in tables:
+ response = client.list_objects_v2(Bucket=bucket, Prefix=table)
+ list_of_keys = [
+ "s3://" + bucket + "/" + object["Key"] for object in response["Contents"]
+ ]
+ list_of_df = [pd.read_csv(key) for key in list_of_keys]
+ table_dfs[table] = pd.concat(list_of_df)
+ return table_dfs
git.ajschof.me — hosted by ajschofield — powered by cgit