aboutsummaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorAlex Schofield <git@ajschof.me>2024-08-20 11:26:26 +0100
committerAlex Schofield <git@ajschof.me>2024-08-20 11:26:26 +0100
commit5211751b69a894874945e3a916c33781a327ab10 (patch)
tree550274e5d7c5ea8806ffa2858fbe545dfcdf1c5f /src
parente25bee6c1c9db8edaf3197f0dc48fa3c63e61744 (diff)
downloadde-project-bentley-5211751b69a894874945e3a916c33781a327ab10.tar.gz
de-project-bentley-5211751b69a894874945e3a916c33781a327ab10.zip
feat: conditional logic for if bucket is empty
Diffstat (limited to 'src')
-rw-r--r--src/extract_lambda.py32
1 files changed, 23 insertions, 9 deletions
diff --git a/src/extract_lambda.py b/src/extract_lambda.py
index 4921034..6216446 100644
--- a/src/extract_lambda.py
+++ b/src/extract_lambda.py
@@ -124,6 +124,7 @@ def list_existing_s3_files(bucket_name=extract_bucket(), client=boto3.client("s3
logger.error(f"Error retrieving S3 object {s3_key}: {e}")
else:
logger.error("The bucket is empty")
+ return None
except ClientError as e:
logger.error(f"Error listing S3 objects: {e}")
@@ -132,13 +133,18 @@ def list_existing_s3_files(bucket_name=extract_bucket(), client=boto3.client("s3
def get_latest_timestamp(existing_files):
- all_datetimes = []
- for file_name in existing_files.keys():
- match = re.search(r"\/(.+/).+_(.+)\.csv", file_name)
- if match:
- datetime_str = "".join(match.group(1, 2))
- all_datetimes.append(datetime.strptime(datetime_str, "%Y/%m/%d/%H:%M:%S"))
- return max(all_datetimes) if all_datetimes else datetime.min
+ if existing_files:
+ all_datetimes = []
+ for file_name in existing_files.keys():
+ match = re.search(r"\/(.+/).+_(.+)\.csv", file_name)
+ if match:
+ datetime_str = "".join(match.group(1, 2))
+ all_datetimes.append(
+ datetime.strptime(datetime_str, "%Y/%m/%d/%H:%M:%S")
+ )
+ return max(all_datetimes) if all_datetimes else datetime.min
+
+ return existing_files
def process_and_upload_tables(db, existing_files, client=boto3.client("s3")):
@@ -163,8 +169,16 @@ def process_and_upload_tables(db, existing_files, client=boto3.client("s3")):
for table in tables:
table_name = table[0]
rows = db.run(
- f"SELECT * FROM {identifier(table_name)} WHERE last_updated >= :latest;",
- latest={datetime.strftime(latest_timestamp, "%Y-%m-%d %H:%M:%S")},
+ f"""
+ SELECT * FROM {identifier(table_name)}
+ WHERE last_updated >= :latest;
+ """,
+ latest={
+ datetime.strftime(
+ latest_timestamp if latest_timestamp else datetime(1990, 1, 1),
+ "%Y-%m-%d %H:%M:%S",
+ )
+ },
)
# Creating a temporary file path and writing the column name to it followed by each row of data
if rows:
git.ajschof.me — hosted by ajschofield — powered by cgit