aboutsummaryrefslogtreecommitdiffstats
path: root/src/transform_lambda
diff options
context:
space:
mode:
authorAlex <git@ajschof.me>2024-09-03 16:09:52 +0100
committerGitHub <noreply@github.com>2024-09-03 16:09:52 +0100
commit06e0727d554b08c4be3db954acb4d281a9146712 (patch)
tree4152f9efe54364a5d6a6cc969befb6cea9015a5b /src/transform_lambda
parentcfe2a5fbc005d3eb766e788ea063f73c70bdca53 (diff)
parent256e4a2d4cb56814d3a87e89895f5954d148fd5d (diff)
downloadde-project-bentley-06e0727d554b08c4be3db954acb4d281a9146712.tar.gz
de-project-bentley-06e0727d554b08c4be3db954acb4d281a9146712.zip
Merge pull request #117 from ajschofield/development
final pr: merge development into main branch
Diffstat (limited to 'src/transform_lambda')
-rw-r--r--src/transform_lambda/transform_lambda.py4
1 files changed, 3 insertions, 1 deletions
diff --git a/src/transform_lambda/transform_lambda.py b/src/transform_lambda/transform_lambda.py
index f782922..54d7d48 100644
--- a/src/transform_lambda/transform_lambda.py
+++ b/src/transform_lambda/transform_lambda.py
@@ -10,7 +10,6 @@ from botocore.exceptions import ClientError
from pg8000.native import Connection, InterfaceError
from datetime import datetime
-
class DBConnectionException(Exception):
"""Wraps pg8000.native Error or DatabaseError."""
@@ -54,6 +53,7 @@ def lambda_handler(event, context):
bucket = bucket_name("transform")
existing_s3_files = list_existing_s3_files(bucket)
+ # print(existing_s3_files)
dict_of_df = read_from_s3_subfolder_to_df(
TABLES, bucket_name("extract"), client=boto3.client("s3")
@@ -123,11 +123,13 @@ def process_to_parquet_and_upload_to_s3(
# changed parquet_file variable to the file name
client.upload_file(f"{table_name}.parquet", bucket, f"{table_name}.parquet")
status["uploaded"].append(table_name)
+ print(status)
for table_name, df in mutable_df_dict.items():
s3_key = datetime.strftime(
datetime.today(), f"{table_name}/%Y/%m/%d/{table_name}_%H:%M:%S.parquet"
)
+ print(s3_key, '<<<< this is S3_Key')
parquet_file = df.to_parquet(
f"{table_name}.parquet", engine="pyarrow"
) # or fastparquet
git.ajschof.me — hosted by ajschofield — powered by cgit