diff options
| author | lian-manonog <lian.manonog@gmail.com> | 2024-08-21 17:11:57 +0100 |
|---|---|---|
| committer | lian-manonog <lian.manonog@gmail.com> | 2024-08-21 17:11:57 +0100 |
| commit | 5b2b4864eae129e112e70d093eb66498d7de401e (patch) | |
| tree | 1f477f0b6ff1f2f01cab682fd74859d0345d4411 /src | |
| parent | 0c02bd3636ed8815aadf73685c20f8c76a073c99 (diff) | |
| download | de-project-bentley-5b2b4864eae129e112e70d093eb66498d7de401e.tar.gz de-project-bentley-5b2b4864eae129e112e70d093eb66498d7de401e.zip | |
wip: fact_purchase_order schema
Diffstat (limited to 'src')
| -rw-r--r-- | src/fact-purchase-table.py | 34 | ||||
| -rw-r--r-- | src/fact-sales-order.py | 2 | ||||
| -rw-r--r-- | src/transform_lambda.py | 4 |
3 files changed, 37 insertions, 3 deletions
diff --git a/src/fact-purchase-table.py b/src/fact-purchase-table.py new file mode 100644 index 0000000..53c0148 --- /dev/null +++ b/src/fact-purchase-table.py @@ -0,0 +1,34 @@ +from src.transform_lambda import read_from_s3_subfolder_to_df, tables +from src.extract_lambda import extract_bucket +import json +import boto3 +import re +import pandas as pd + + +dict_of_df = read_from_s3_subfolder_to_df(tables, extract_bucket(), client=boto3.client("s3")) + + +# iterates through each dataframe in the list of dataframes and assigns them to a variable +df_staff = dict_of_df['staff'] ##no change +df_currency = dict_of_df['currency'] ##scraping API +df_counterparty = dict_of_df['counterparty'] +df_address = dict_of_df['address'] +df_department = dict_of_df['department'] +df_purchase_order = dict_of_df['purchase_order'] + +## dim_staff table is the same across the schemas (no change) + +## dim_counterparty table + +## dim_location df_currency --> drops 2 columns +dim_location = df_address.drop(labels=['created_at', 'last_updated'], axis=1).rename(columns={'address_id': 'location_id'}) + +## dim_counterparty +df_prefixed_address = df_address.add_prefix('counterparty_legal_', axis=1) +pd.merge(df_counterparty, + df_prefixed_address, + left_on="legal_address_id", + right_on="address_id", + how="outer") + diff --git a/src/fact-sales-order.py b/src/fact-sales-order.py index 399e435..57e2e84 100644 --- a/src/fact-sales-order.py +++ b/src/fact-sales-order.py @@ -69,7 +69,7 @@ counterparty_address = pd.merge( df_address, left_on="legal_address_id", right_on="address_id", - how="outer", + how="outer" ) counterparty_address.rename( columns={ diff --git a/src/transform_lambda.py b/src/transform_lambda.py index 9238180..920a24f 100644 --- a/src/transform_lambda.py +++ b/src/transform_lambda.py @@ -1,8 +1,6 @@ import json import boto3 import re -import io -from io import StringIO import pandas as pd @@ -35,3 +33,5 @@ def read_from_s3_subfolder_to_df(tables, bucket, client=boto3.client("s3")): list_of_df = [pd.read_csv(key) for key in list_of_keys] table_dfs[table] = pd.concat(list_of_df) return table_dfs + + |
