diff options
| author | Alex Schofield <git@ajschof.me> | 2024-08-19 23:44:52 +0100 |
|---|---|---|
| committer | Alex Schofield <git@ajschof.me> | 2024-08-19 23:44:52 +0100 |
| commit | 3e80acb28eeeb0eaff97c2363124a8c6e95bcb13 (patch) | |
| tree | 34b2c667babe7295ff799b9a146fa3c2102599e6 /src | |
| parent | 5be3b130170c82360ff9715f5c09b9e815fc16f4 (diff) | |
| download | de-project-bentley-3e80acb28eeeb0eaff97c2363124a8c6e95bcb13.tar.gz de-project-bentley-3e80acb28eeeb0eaff97c2363124a8c6e95bcb13.zip | |
refactor: optimise s3 streaming & file naming
Diffstat (limited to 'src')
| -rw-r--r-- | src/extract_lambda.py | 17 |
1 files changed, 6 insertions, 11 deletions
diff --git a/src/extract_lambda.py b/src/extract_lambda.py index f38e24a..8575b08 100644 --- a/src/extract_lambda.py +++ b/src/extract_lambda.py @@ -149,15 +149,9 @@ def stream_to_s3(table_name, rows, column_names, s3_client, bucket_name, s3_key) for row in rows: csv_writer.writerow(row) - if csv_buffer.tell() > 5 * 1024 * 1024: - csv_buffer.seek(0) - s3_client.upload_fileobj(csv_buffer, bucket_name, s3_key) - csv_buffer.truncate(0) - csv_buffer.seek(0) + csv_buffer.seek(0) - if csv_buffer.tell() > 0: - csv_buffer.seek(0) - s3_client.upload_fileobj(csv_buffer, bucket_name, s3_key) + s3_client.upload_fileobj(csv_buffer, bucket_name, s3_key) def process_and_upload_tables(db, existing_files, client=boto3.client("s3")): @@ -190,13 +184,14 @@ def process_and_upload_tables(db, existing_files, client=boto3.client("s3")): col_name[0] for col_name in db.run( """SELECT column_name FROM INFORMATION_SCHEMA.COLUMNS - WHERE table_name = :table ;""", + WHERE table_name = :table ;""", table=table_name, ) ] - s3_key = datetime.strftime( - datetime.today(), f"{table_name}/%Y/%m/%d/{table_name}_%H:%M:%S.csv" + s3_key = ( + f"{table_name}/{datetime.now().strftime('%Y/%m/%d')}/" + f"{table_name}_{datetime.now().strftime('%H:%M:%S')}.csv" ) try: |
