diff options
| author | Ellie <ecsymonds@gmail.com> | 2024-08-23 09:47:52 +0100 |
|---|---|---|
| committer | Ellie <ecsymonds@gmail.com> | 2024-08-23 09:47:52 +0100 |
| commit | 265d61c34c3a56b7e74333911e65d3148b2945b4 (patch) | |
| tree | 539d5bb30f2a468d242df7e323a8b59995fa558e /src/load_lambda.py | |
| parent | 6bf831c5387408e92a63cb5667aab8f415b536e4 (diff) | |
| download | de-project-bentley-265d61c34c3a56b7e74333911e65d3148b2945b4.tar.gz de-project-bentley-265d61c34c3a56b7e74333911e65d3148b2945b4.zip | |
add get transform bucket function
Diffstat (limited to 'src/load_lambda.py')
| -rw-r--r-- | src/load_lambda.py | 16 |
1 files changed, 15 insertions, 1 deletions
diff --git a/src/load_lambda.py b/src/load_lambda.py index 1813db4..a3fd996 100644 --- a/src/load_lambda.py +++ b/src/load_lambda.py @@ -17,6 +17,20 @@ logging.basicConfig( logging.getLogger("botocore").setLevel(logging.WARNING) +# get transform bucket +def transform_bucket(client=None): + if client is None: + client = boto3.client("s3") + response = client.list_buckets() + transform_bucket_filter = [ + bucket["Name"] for bucket in response["Buckets"] if "transform" in bucket["Name"] + ] + + if not transform_bucket_filter: + raise ValueError("No transform_bucket found") + + return transform_bucket_filter[0] + # list and then retrieve parquet files from S3 bucket # convert parquet files into dataframes and return a list of dataframes def convert_parquet_files_to_dfs(bucket_name=None, client=None): @@ -24,7 +38,7 @@ def convert_parquet_files_to_dfs(bucket_name=None, client=None): if client is None: client = boto3.client("s3") if bucket_name is None: - bucket_name = "transform_bucket" + bucket_name = transform_bucket(client) files = client.list_objects_v2(Bucket=bucket_name) dfs = [] |
