diff options
| author | T-Aji <tolujbd2@gmail.com> | 2024-08-22 12:28:32 +0100 |
|---|---|---|
| committer | T-Aji <tolujbd2@gmail.com> | 2024-08-22 12:28:32 +0100 |
| commit | fe5a5ac472e90f1994b6f974429327ba9007ced3 (patch) | |
| tree | 6451f6dcbbf81da488e73f23b8d53c854608f711 | |
| parent | 8e1893d3943eff65df6517c04b167f7bce0dd200 (diff) | |
| parent | 395731433d9e10eb748fc44669886d8aa80951e1 (diff) | |
| download | de-project-bentley-fe5a5ac472e90f1994b6f974429327ba9007ced3.tar.gz de-project-bentley-fe5a5ac472e90f1994b6f974429327ba9007ced3.zip | |
Merge branch 'feature/transform-fact-sales-order' of https://github.com/ajschofield/de-project-bentley into feature/transform-fact-sales-order
| -rw-r--r-- | src/fact-purchase-table.py | 53 |
1 files changed, 30 insertions, 23 deletions
diff --git a/src/fact-purchase-table.py b/src/fact-purchase-table.py index 53c0148..91f5077 100644 --- a/src/fact-purchase-table.py +++ b/src/fact-purchase-table.py @@ -6,29 +6,36 @@ import re import pandas as pd -dict_of_df = read_from_s3_subfolder_to_df(tables, extract_bucket(), client=boto3.client("s3")) - - -# iterates through each dataframe in the list of dataframes and assigns them to a variable -df_staff = dict_of_df['staff'] ##no change -df_currency = dict_of_df['currency'] ##scraping API -df_counterparty = dict_of_df['counterparty'] -df_address = dict_of_df['address'] -df_department = dict_of_df['department'] -df_purchase_order = dict_of_df['purchase_order'] +# iterates through each dataframe in the list of dataframes and assigns them to a variable +def get_dfs_from_dict(tables,dictionary=dict_of_df): + for table in tables: + df_staff = dict_of_df['staff'] ##no change + df_currency = dict_of_df['currency'] ##scraping API + df_counterparty = dict_of_df['counterparty'] + df_address = dict_of_df['address'] + df_department = dict_of_df['department'] + df_purchase_order = dict_of_df['purchase_order'] ## dim_staff table is the same across the schemas (no change) -## dim_counterparty table - -## dim_location df_currency --> drops 2 columns -dim_location = df_address.drop(labels=['created_at', 'last_updated'], axis=1).rename(columns={'address_id': 'location_id'}) - -## dim_counterparty -df_prefixed_address = df_address.add_prefix('counterparty_legal_', axis=1) -pd.merge(df_counterparty, - df_prefixed_address, - left_on="legal_address_id", - right_on="address_id", - how="outer") - +## dim_location from address --> drops 2 columns +def create_dim_location(dict_of_df): + dim_location = dict_of_df['address'].drop(labels=['created_at', 'last_updated'], axis=1).rename(columns={'address_id': 'location_id'}) + return dim_location + +## dim_counterparty from address and counterparty +def create_dim_counterparty(dict_of_df): + df_prefixed_address = dict_of_df['address'].add_prefix('counterparty_legal_', axis=1) + pd.merge(dict_of_df['counterparty'], + df_prefixed_address, + left_on="legal_address_id", + right_on="address_id", + how="outer") + +def create_fact_purchase_order(dict_of_df): + df_po = dict_of_df['purchase_order'] + df_po.index.name = 'purchase_record_id' + #df_po['create_date'] = df_po['create_at'].date() + #df_po['create_time'] = df_po['create_at'].time() + df_po['agreed_delivery_date'] = + df_po['agreed_payment_date']
\ No newline at end of file |
