aboutsummaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorlian-manonog <lian.manonog@gmail.com>2024-08-21 17:11:57 +0100
committerlian-manonog <lian.manonog@gmail.com>2024-08-21 17:11:57 +0100
commit5b2b4864eae129e112e70d093eb66498d7de401e (patch)
tree1f477f0b6ff1f2f01cab682fd74859d0345d4411 /src
parent0c02bd3636ed8815aadf73685c20f8c76a073c99 (diff)
downloadde-project-bentley-5b2b4864eae129e112e70d093eb66498d7de401e.tar.gz
de-project-bentley-5b2b4864eae129e112e70d093eb66498d7de401e.zip
wip: fact_purchase_order schema
Diffstat (limited to 'src')
-rw-r--r--src/fact-purchase-table.py34
-rw-r--r--src/fact-sales-order.py2
-rw-r--r--src/transform_lambda.py4
3 files changed, 37 insertions, 3 deletions
diff --git a/src/fact-purchase-table.py b/src/fact-purchase-table.py
new file mode 100644
index 0000000..53c0148
--- /dev/null
+++ b/src/fact-purchase-table.py
@@ -0,0 +1,34 @@
+from src.transform_lambda import read_from_s3_subfolder_to_df, tables
+from src.extract_lambda import extract_bucket
+import json
+import boto3
+import re
+import pandas as pd
+
+
+dict_of_df = read_from_s3_subfolder_to_df(tables, extract_bucket(), client=boto3.client("s3"))
+
+
+# iterates through each dataframe in the list of dataframes and assigns them to a variable
+df_staff = dict_of_df['staff'] ##no change
+df_currency = dict_of_df['currency'] ##scraping API
+df_counterparty = dict_of_df['counterparty']
+df_address = dict_of_df['address']
+df_department = dict_of_df['department']
+df_purchase_order = dict_of_df['purchase_order']
+
+## dim_staff table is the same across the schemas (no change)
+
+## dim_counterparty table
+
+## dim_location df_currency --> drops 2 columns
+dim_location = df_address.drop(labels=['created_at', 'last_updated'], axis=1).rename(columns={'address_id': 'location_id'})
+
+## dim_counterparty
+df_prefixed_address = df_address.add_prefix('counterparty_legal_', axis=1)
+pd.merge(df_counterparty,
+ df_prefixed_address,
+ left_on="legal_address_id",
+ right_on="address_id",
+ how="outer")
+
diff --git a/src/fact-sales-order.py b/src/fact-sales-order.py
index 399e435..57e2e84 100644
--- a/src/fact-sales-order.py
+++ b/src/fact-sales-order.py
@@ -69,7 +69,7 @@ counterparty_address = pd.merge(
df_address,
left_on="legal_address_id",
right_on="address_id",
- how="outer",
+ how="outer"
)
counterparty_address.rename(
columns={
diff --git a/src/transform_lambda.py b/src/transform_lambda.py
index 9238180..920a24f 100644
--- a/src/transform_lambda.py
+++ b/src/transform_lambda.py
@@ -1,8 +1,6 @@
import json
import boto3
import re
-import io
-from io import StringIO
import pandas as pd
@@ -35,3 +33,5 @@ def read_from_s3_subfolder_to_df(tables, bucket, client=boto3.client("s3")):
list_of_df = [pd.read_csv(key) for key in list_of_keys]
table_dfs[table] = pd.concat(list_of_df)
return table_dfs
+
+
git.ajschof.me — hosted by ajschofield — powered by cgit