aboutsummaryrefslogtreecommitdiffstats
path: root/src/fact-purchase-table.py
diff options
context:
space:
mode:
authorT-Aji <tolujbd2@gmail.com>2024-08-22 12:28:32 +0100
committerT-Aji <tolujbd2@gmail.com>2024-08-22 12:28:32 +0100
commitfe5a5ac472e90f1994b6f974429327ba9007ced3 (patch)
tree6451f6dcbbf81da488e73f23b8d53c854608f711 /src/fact-purchase-table.py
parent8e1893d3943eff65df6517c04b167f7bce0dd200 (diff)
parent395731433d9e10eb748fc44669886d8aa80951e1 (diff)
downloadde-project-bentley-fe5a5ac472e90f1994b6f974429327ba9007ced3.tar.gz
de-project-bentley-fe5a5ac472e90f1994b6f974429327ba9007ced3.zip
Merge branch 'feature/transform-fact-sales-order' of https://github.com/ajschofield/de-project-bentley into feature/transform-fact-sales-order
Diffstat (limited to 'src/fact-purchase-table.py')
-rw-r--r--src/fact-purchase-table.py53
1 files changed, 30 insertions, 23 deletions
diff --git a/src/fact-purchase-table.py b/src/fact-purchase-table.py
index 53c0148..91f5077 100644
--- a/src/fact-purchase-table.py
+++ b/src/fact-purchase-table.py
@@ -6,29 +6,36 @@ import re
import pandas as pd
-dict_of_df = read_from_s3_subfolder_to_df(tables, extract_bucket(), client=boto3.client("s3"))
-
-
-# iterates through each dataframe in the list of dataframes and assigns them to a variable
-df_staff = dict_of_df['staff'] ##no change
-df_currency = dict_of_df['currency'] ##scraping API
-df_counterparty = dict_of_df['counterparty']
-df_address = dict_of_df['address']
-df_department = dict_of_df['department']
-df_purchase_order = dict_of_df['purchase_order']
+# iterates through each dataframe in the list of dataframes and assigns them to a variable
+def get_dfs_from_dict(tables,dictionary=dict_of_df):
+ for table in tables:
+ df_staff = dict_of_df['staff'] ##no change
+ df_currency = dict_of_df['currency'] ##scraping API
+ df_counterparty = dict_of_df['counterparty']
+ df_address = dict_of_df['address']
+ df_department = dict_of_df['department']
+ df_purchase_order = dict_of_df['purchase_order']
## dim_staff table is the same across the schemas (no change)
-## dim_counterparty table
-
-## dim_location df_currency --> drops 2 columns
-dim_location = df_address.drop(labels=['created_at', 'last_updated'], axis=1).rename(columns={'address_id': 'location_id'})
-
-## dim_counterparty
-df_prefixed_address = df_address.add_prefix('counterparty_legal_', axis=1)
-pd.merge(df_counterparty,
- df_prefixed_address,
- left_on="legal_address_id",
- right_on="address_id",
- how="outer")
-
+## dim_location from address --> drops 2 columns
+def create_dim_location(dict_of_df):
+ dim_location = dict_of_df['address'].drop(labels=['created_at', 'last_updated'], axis=1).rename(columns={'address_id': 'location_id'})
+ return dim_location
+
+## dim_counterparty from address and counterparty
+def create_dim_counterparty(dict_of_df):
+ df_prefixed_address = dict_of_df['address'].add_prefix('counterparty_legal_', axis=1)
+ pd.merge(dict_of_df['counterparty'],
+ df_prefixed_address,
+ left_on="legal_address_id",
+ right_on="address_id",
+ how="outer")
+
+def create_fact_purchase_order(dict_of_df):
+ df_po = dict_of_df['purchase_order']
+ df_po.index.name = 'purchase_record_id'
+ #df_po['create_date'] = df_po['create_at'].date()
+ #df_po['create_time'] = df_po['create_at'].time()
+ df_po['agreed_delivery_date'] =
+ df_po['agreed_payment_date'] \ No newline at end of file
git.ajschof.me — hosted by ajschofield — powered by cgit