diff options
| author | Alex <git@ajschof.me> | 2024-09-03 16:08:22 +0100 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2024-09-03 16:08:22 +0100 |
| commit | 256e4a2d4cb56814d3a87e89895f5954d148fd5d (patch) | |
| tree | 4152f9efe54364a5d6a6cc969befb6cea9015a5b /src/transform_lambda | |
| parent | 3b8e89968e3d3d3527ea76b4517b0d7278512530 (diff) | |
| parent | ce30178558cc8222e9975273eb5d08a93ae92fcc (diff) | |
| download | de-project-bentley-256e4a2d4cb56814d3a87e89895f5954d148fd5d.tar.gz de-project-bentley-256e4a2d4cb56814d3a87e89895f5954d148fd5d.zip | |
Merge pull request #116 from ajschofield/test/tests_transform_lambdadevelopment
pr: merge test/tests_transform_lambda into development
Diffstat (limited to 'src/transform_lambda')
| -rw-r--r-- | src/transform_lambda/transform_lambda.py | 4 |
1 files changed, 3 insertions, 1 deletions
diff --git a/src/transform_lambda/transform_lambda.py b/src/transform_lambda/transform_lambda.py index f782922..54d7d48 100644 --- a/src/transform_lambda/transform_lambda.py +++ b/src/transform_lambda/transform_lambda.py @@ -10,7 +10,6 @@ from botocore.exceptions import ClientError from pg8000.native import Connection, InterfaceError from datetime import datetime - class DBConnectionException(Exception): """Wraps pg8000.native Error or DatabaseError.""" @@ -54,6 +53,7 @@ def lambda_handler(event, context): bucket = bucket_name("transform") existing_s3_files = list_existing_s3_files(bucket) + # print(existing_s3_files) dict_of_df = read_from_s3_subfolder_to_df( TABLES, bucket_name("extract"), client=boto3.client("s3") @@ -123,11 +123,13 @@ def process_to_parquet_and_upload_to_s3( # changed parquet_file variable to the file name client.upload_file(f"{table_name}.parquet", bucket, f"{table_name}.parquet") status["uploaded"].append(table_name) + print(status) for table_name, df in mutable_df_dict.items(): s3_key = datetime.strftime( datetime.today(), f"{table_name}/%Y/%m/%d/{table_name}_%H:%M:%S.parquet" ) + print(s3_key, '<<<< this is S3_Key') parquet_file = df.to_parquet( f"{table_name}.parquet", engine="pyarrow" ) # or fastparquet |
