aboutsummaryrefslogtreecommitdiffstats
path: root/src/extract_lambda.py
diff options
context:
space:
mode:
authorAlex Schofield <git@ajschof.me>2024-08-19 23:44:52 +0100
committerAlex Schofield <git@ajschof.me>2024-08-19 23:44:52 +0100
commit3e80acb28eeeb0eaff97c2363124a8c6e95bcb13 (patch)
tree34b2c667babe7295ff799b9a146fa3c2102599e6 /src/extract_lambda.py
parent5be3b130170c82360ff9715f5c09b9e815fc16f4 (diff)
downloadde-project-bentley-3e80acb28eeeb0eaff97c2363124a8c6e95bcb13.tar.gz
de-project-bentley-3e80acb28eeeb0eaff97c2363124a8c6e95bcb13.zip
refactor: optimise s3 streaming & file naming
Diffstat (limited to 'src/extract_lambda.py')
-rw-r--r--src/extract_lambda.py17
1 files changed, 6 insertions, 11 deletions
diff --git a/src/extract_lambda.py b/src/extract_lambda.py
index f38e24a..8575b08 100644
--- a/src/extract_lambda.py
+++ b/src/extract_lambda.py
@@ -149,15 +149,9 @@ def stream_to_s3(table_name, rows, column_names, s3_client, bucket_name, s3_key)
for row in rows:
csv_writer.writerow(row)
- if csv_buffer.tell() > 5 * 1024 * 1024:
- csv_buffer.seek(0)
- s3_client.upload_fileobj(csv_buffer, bucket_name, s3_key)
- csv_buffer.truncate(0)
- csv_buffer.seek(0)
+ csv_buffer.seek(0)
- if csv_buffer.tell() > 0:
- csv_buffer.seek(0)
- s3_client.upload_fileobj(csv_buffer, bucket_name, s3_key)
+ s3_client.upload_fileobj(csv_buffer, bucket_name, s3_key)
def process_and_upload_tables(db, existing_files, client=boto3.client("s3")):
@@ -190,13 +184,14 @@ def process_and_upload_tables(db, existing_files, client=boto3.client("s3")):
col_name[0]
for col_name in db.run(
"""SELECT column_name FROM INFORMATION_SCHEMA.COLUMNS
- WHERE table_name = :table ;""",
+ WHERE table_name = :table ;""",
table=table_name,
)
]
- s3_key = datetime.strftime(
- datetime.today(), f"{table_name}/%Y/%m/%d/{table_name}_%H:%M:%S.csv"
+ s3_key = (
+ f"{table_name}/{datetime.now().strftime('%Y/%m/%d')}/"
+ f"{table_name}_{datetime.now().strftime('%H:%M:%S')}.csv"
)
try:
git.ajschof.me — hosted by ajschofield — powered by cgit