aboutsummaryrefslogtreecommitdiffstats
path: root/src/load_lambda.py
diff options
context:
space:
mode:
authorT-Aji <tolujbd2@gmail.com>2024-08-28 12:48:13 +0100
committerT-Aji <tolujbd2@gmail.com>2024-08-28 12:48:13 +0100
commit03a5959df25f74d52ed5393c2a5af6b1b9eb34c9 (patch)
treef460c9eba49b91366334e8fd59533ea2474e3cd6 /src/load_lambda.py
parent65b1ca8b41488eba4db9bcb9773d857493e87b25 (diff)
downloadde-project-bentley-03a5959df25f74d52ed5393c2a5af6b1b9eb34c9.tar.gz
de-project-bentley-03a5959df25f74d52ed5393c2a5af6b1b9eb34c9.zip
refactored functs to include columns instead of drop columns
Diffstat (limited to 'src/load_lambda.py')
-rw-r--r--src/load_lambda.py5
1 files changed, 4 insertions, 1 deletions
diff --git a/src/load_lambda.py b/src/load_lambda.py
index 7339ab9..926b4db 100644
--- a/src/load_lambda.py
+++ b/src/load_lambda.py
@@ -134,6 +134,9 @@ def convert_parquet_files_to_dfs(bucket_name=None, client=None):
file_obj = client.get_object(Bucket=bucket_name, Key=file_key)
parquet_file = pq.ParquetFile(BytesIO(file_obj["Body"].read()))
df = parquet_file.read().to_pandas()
+ print("df", df)
+ print("type", type(df))
+ print(df.columns)
dfs[file_key] = df
except ClientError as e:
logger.error(f"Unable to retrieve S3 object {file_key}: {e}")
@@ -148,7 +151,7 @@ def convert_parquet_files_to_dfs(bucket_name=None, client=None):
except ClientError as client_error:
logger.error(f"Unable to list objects: {client_error}")
raise
-
+ print()
return dfs
git.ajschof.me — hosted by ajschofield — powered by cgit