diff options
| author | Ang Bel <anzelikabelotelova@Anzelikas-MacBook-Air.local> | 2024-08-21 15:07:51 +0100 |
|---|---|---|
| committer | Ang Bel <anzelikabelotelova@Anzelikas-MacBook-Air.local> | 2024-08-21 15:14:35 +0100 |
| commit | 0c6e2f8486d1ec4d9b0bd4984e01baca3a159df0 (patch) | |
| tree | a29c5dd0225d3ce377368886c44911fd97d89c01 /src | |
| parent | 562fac411ce0bedf3dbf067390cacef89ef47981 (diff) | |
| download | de-project-bentley-0c6e2f8486d1ec4d9b0bd4984e01baca3a159df0.tar.gz de-project-bentley-0c6e2f8486d1ec4d9b0bd4984e01baca3a159df0.zip | |
(tests) Read from s3 to df passes
Diffstat (limited to 'src')
| -rw-r--r-- | src/transform_lambda.py | 26 |
1 files changed, 4 insertions, 22 deletions
diff --git a/src/transform_lambda.py b/src/transform_lambda.py index 6f65728..ea4e16f 100644 --- a/src/transform_lambda.py +++ b/src/transform_lambda.py @@ -15,18 +15,6 @@ import pandas as pd def lambda_handler(event, context): s3_client = boto3.client('s3') - - tables = ['sales_order', - 'transaction', - 'payment', - 'counterparty', - 'address', - 'staff', - 'purchase_order', - 'department', - 'currency', - 'design', - 'payment_type'] try: s3_bucket_name = event["Records"][0]["s3"]["bucket"]["name"] s3_file_name = event["Records"][0]["s3"]["object"]["key"] @@ -51,9 +39,8 @@ def lambda_handler(event, context): 'body': json.dumps('') } -## each csv file must be converted into a pandas df -## done via read_csv, where stringIO creates an file-like-object from string - treats string like a file: as file is not physically stored in file -## each file needs its own panda df (?) to be normalised +## Started from fresh on Wed 21st Aug: + tables = ['sales_order', 'transaction', 'payment', @@ -70,14 +57,9 @@ def read_from_s3_subfolder_to_df(tables, bucket, client=boto3.client('s3')): table_dfs = {} for table in tables: response = client.list_objects_v2(Bucket=bucket, Prefix=table) - list_of_keys = ['s3://'+object['Key'] for object in response['Contents']] - print(list_of_keys) + list_of_keys = ['s3://'+bucket+'/'+object['Key'] for object in response['Contents']] list_of_df = [pd.read_csv(key) for key in list_of_keys] table_dfs[table] = pd.concat(list_of_df) return table_dfs - # exec("%s = %d" % (table,pd.concat(list_of_df))) - # exec(f"{table} = {pd.concat(list_of_df)}") - # table_dfs = [sales_order, transaction, payment, counterparty, address, - # staff, purchase_order, department, currency, design, payment_type] - + |
