diff options
| author | T-Aji <tolujbd2@gmail.com> | 2024-08-28 12:48:13 +0100 |
|---|---|---|
| committer | T-Aji <tolujbd2@gmail.com> | 2024-08-28 12:48:13 +0100 |
| commit | 03a5959df25f74d52ed5393c2a5af6b1b9eb34c9 (patch) | |
| tree | f460c9eba49b91366334e8fd59533ea2474e3cd6 /src/load_lambda.py | |
| parent | 65b1ca8b41488eba4db9bcb9773d857493e87b25 (diff) | |
| download | de-project-bentley-03a5959df25f74d52ed5393c2a5af6b1b9eb34c9.tar.gz de-project-bentley-03a5959df25f74d52ed5393c2a5af6b1b9eb34c9.zip | |
refactored functs to include columns instead of drop columns
Diffstat (limited to 'src/load_lambda.py')
| -rw-r--r-- | src/load_lambda.py | 5 |
1 files changed, 4 insertions, 1 deletions
diff --git a/src/load_lambda.py b/src/load_lambda.py index 7339ab9..926b4db 100644 --- a/src/load_lambda.py +++ b/src/load_lambda.py @@ -134,6 +134,9 @@ def convert_parquet_files_to_dfs(bucket_name=None, client=None): file_obj = client.get_object(Bucket=bucket_name, Key=file_key) parquet_file = pq.ParquetFile(BytesIO(file_obj["Body"].read())) df = parquet_file.read().to_pandas() + print("df", df) + print("type", type(df)) + print(df.columns) dfs[file_key] = df except ClientError as e: logger.error(f"Unable to retrieve S3 object {file_key}: {e}") @@ -148,7 +151,7 @@ def convert_parquet_files_to_dfs(bucket_name=None, client=None): except ClientError as client_error: logger.error(f"Unable to list objects: {client_error}") raise - + print() return dfs |
