Merge branch 'feature/transform-fact-sales-order' of https://github.com/ajschofield/de-project-bentley into feature/transform-fact-sales-order

author: T-Aji <tolujbd2@gmail.com> 2024-08-22 12:28:32 +0100
committer: T-Aji <tolujbd2@gmail.com> 2024-08-22 12:28:32 +0100
commit: fe5a5ac472e90f1994b6f974429327ba9007ced3 (patch)
tree: 6451f6dcbbf81da488e73f23b8d53c854608f711
parent: 8e1893d3943eff65df6517c04b167f7bce0dd200 (diff)
parent: 395731433d9e10eb748fc44669886d8aa80951e1 (diff)
download: de-project-bentley-fe5a5ac472e90f1994b6f974429327ba9007ced3.tar.gz
de-project-bentley-fe5a5ac472e90f1994b6f974429327ba9007ced3.zip
1 files changed, 30 insertions, 23 deletions
diff --git a/src/fact-purchase-table.py b/src/fact-purchase-table.py
index 53c0148..91f5077 100644
--- a/src/fact-purchase-table.py
+++ b/src/fact-purchase-table.py
@@ -6,29 +6,36 @@ import re
 import pandas as pd
 
 
-dict_of_df = read_from_s3_subfolder_to_df(tables, extract_bucket(), client=boto3.client("s3"))
-
-
-# iterates through each dataframe in the list of dataframes and assigns them to a variable
-df_staff = dict_of_df['staff'] ##no change
-df_currency = dict_of_df['currency'] ##scraping API 
-df_counterparty = dict_of_df['counterparty']
-df_address = dict_of_df['address']
-df_department = dict_of_df['department']
-df_purchase_order = dict_of_df['purchase_order']
+# iterates through each dataframe in the list of dataframes and assigns them to a variable 
+def get_dfs_from_dict(tables,dictionary=dict_of_df):
+    for table in tables:
+    df_staff = dict_of_df['staff'] ##no change
+    df_currency = dict_of_df['currency'] ##scraping API 
+    df_counterparty = dict_of_df['counterparty']
+    df_address = dict_of_df['address']
+    df_department = dict_of_df['department']
+    df_purchase_order = dict_of_df['purchase_order']
 
 ## dim_staff table is the same across the schemas (no change)
 
-## dim_counterparty table
-
-## dim_location df_currency --> drops 2 columns
-dim_location = df_address.drop(labels=['created_at', 'last_updated'], axis=1).rename(columns={'address_id': 'location_id'})
-
-## dim_counterparty 
-df_prefixed_address = df_address.add_prefix('counterparty_legal_', axis=1) 
-pd.merge(df_counterparty, 
-         df_prefixed_address, 
-         left_on="legal_address_id", 
-         right_on="address_id", 
-         how="outer")
-
+## dim_location from address --> drops 2 columns
+def create_dim_location(dict_of_df):
+    dim_location = dict_of_df['address'].drop(labels=['created_at', 'last_updated'], axis=1).rename(columns={'address_id': 'location_id'})
+    return dim_location
+
+## dim_counterparty from address and counterparty
+def create_dim_counterparty(dict_of_df):
+    df_prefixed_address = dict_of_df['address'].add_prefix('counterparty_legal_', axis=1) 
+    pd.merge(dict_of_df['counterparty'], 
+            df_prefixed_address, 
+            left_on="legal_address_id", 
+            right_on="address_id", 
+            how="outer")
+
+def create_fact_purchase_order(dict_of_df):
+    df_po = dict_of_df['purchase_order']
+    df_po.index.name = 'purchase_record_id'
+    #df_po['create_date'] = df_po['create_at'].date()
+    #df_po['create_time'] = df_po['create_at'].time()
+    df_po['agreed_delivery_date'] = 
+    df_po['agreed_payment_date']
+\ No newline at end of file
author	T-Aji <tolujbd2@gmail.com>	2024-08-22 12:28:32 +0100
committer	T-Aji <tolujbd2@gmail.com>	2024-08-22 12:28:32 +0100
commit	fe5a5ac472e90f1994b6f974429327ba9007ced3 (patch)
tree	6451f6dcbbf81da488e73f23b8d53c854608f711
parent	8e1893d3943eff65df6517c04b167f7bce0dd200 (diff)
parent	395731433d9e10eb748fc44669886d8aa80951e1 (diff)
download	de-project-bentley-fe5a5ac472e90f1994b6f974429327ba9007ced3.tar.gz de-project-bentley-fe5a5ac472e90f1994b6f974429327ba9007ced3.zip