From dd68d948dec97fedfcaa89806523975ad1224c71 Mon Sep 17 00:00:00 2001 From: Ang Bel Date: Fri, 16 Aug 2024 13:48:22 +0100 Subject: refactoring for extract lambda to filter by last updated and if not empty write it s3 --- .gitignore | 2 ++ src/extract_lambda.py | 26 +++++++++++--------------- 2 files changed, 13 insertions(+), 15 deletions(-) diff --git a/.gitignore b/.gitignore index ca15434..bceab93 100644 --- a/.gitignore +++ b/.gitignore @@ -14,3 +14,5 @@ __pycache__/ # OS-Related Files .DS_Store + +*venv* diff --git a/src/extract_lambda.py b/src/extract_lambda.py index f4c0c1d..e348bef 100644 --- a/src/extract_lambda.py +++ b/src/extract_lambda.py @@ -136,9 +136,9 @@ def process_and_upload_tables(db, existing_files, client=boto3.client('s3')): print(tables) for table in tables: table_name = table[0] - rows = db.run(f"SELECT * FROM {table_name};") - + rows = db.run(f"SELECT * FROM {table_name} WHERE last_updated >= {datetime.strftime(latest_timestamp,'%H-%m-%d %H:%M:%S')};") + if rows: csv_file_path = f"/tmp/{table_name}.csv" with open(csv_file_path, "w", newline='') as file: writer = csv.writer(file) @@ -147,16 +147,12 @@ def process_and_upload_tables(db, existing_files, client=boto3.client('s3')): writer.writerow(column_names) writer.writerows(rows) s3_key = datetime.strftime(datetime.today(),f'{table_name}/%Y/%m/%d/{table_name}_%H:%M:%S.csv') - new_csv_content = open(csv_file_path, "r").read() - ## NEW CODE - latest_s3_object_key = datetime.strftime(latest_timestamp,f'{table_name}/%Y/%m/%d/{table_name}_%H:%M:%S.csv') - ## END OF NEW CODE - if existing_files[latest_s3_object_key] != new_csv_content: - try: - client.upload_file(csv_file_path, extract_bucket(), s3_key) - logger.info(f"Uploaded {s3_key} to S3.") - except ClientError as e: - logger.error(f'Error uploading to S3: {e}') - else: - logger.info(f"No new data.") - \ No newline at end of file + + try: + client.upload_file(csv_file_path, extract_bucket(), s3_key) + logger.info(f"Uploaded {s3_key} to S3.") + except ClientError as e: + logger.error(f'Error uploading to S3: {e}') + else: + logger.info(f"No new data.") + \ No newline at end of file -- cgit v1.2.3