aboutsummaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/fact-purchase-table.py34
-rw-r--r--src/fact-sales-order.py29
-rw-r--r--src/transform_lambda.py4
3 files changed, 58 insertions, 9 deletions
diff --git a/src/fact-purchase-table.py b/src/fact-purchase-table.py
new file mode 100644
index 0000000..53c0148
--- /dev/null
+++ b/src/fact-purchase-table.py
@@ -0,0 +1,34 @@
+from src.transform_lambda import read_from_s3_subfolder_to_df, tables
+from src.extract_lambda import extract_bucket
+import json
+import boto3
+import re
+import pandas as pd
+
+
+dict_of_df = read_from_s3_subfolder_to_df(tables, extract_bucket(), client=boto3.client("s3"))
+
+
+# iterates through each dataframe in the list of dataframes and assigns them to a variable
+df_staff = dict_of_df['staff'] ##no change
+df_currency = dict_of_df['currency'] ##scraping API
+df_counterparty = dict_of_df['counterparty']
+df_address = dict_of_df['address']
+df_department = dict_of_df['department']
+df_purchase_order = dict_of_df['purchase_order']
+
+## dim_staff table is the same across the schemas (no change)
+
+## dim_counterparty table
+
+## dim_location df_currency --> drops 2 columns
+dim_location = df_address.drop(labels=['created_at', 'last_updated'], axis=1).rename(columns={'address_id': 'location_id'})
+
+## dim_counterparty
+df_prefixed_address = df_address.add_prefix('counterparty_legal_', axis=1)
+pd.merge(df_counterparty,
+ df_prefixed_address,
+ left_on="legal_address_id",
+ right_on="address_id",
+ how="outer")
+
diff --git a/src/fact-sales-order.py b/src/fact-sales-order.py
index ef18f02..870f660 100644
--- a/src/fact-sales-order.py
+++ b/src/fact-sales-order.py
@@ -1,7 +1,8 @@
import pandas as pd
from src.transform_lambda import get_dataframes
-dict_of_df = get_dataframes() # {"design": "design dataframe", "address": "address dataframe", ....}
+# {"design": "design dataframe", "address": "address dataframe", ....}
+dict_of_df = get_dataframes()
# iterates through each dataframe in the list of dataframes and assigns them to a variable
@@ -33,16 +34,30 @@ dim_currency["currency_name"] = dim_currency["currency_code"].map(mappings)
# creates the dim_location dataframe
-# need to change address id to location id
+# need to change address id to location id
"dim_location dataframe: (location_id, address_line_1, address_line_2, district, city, postal code, country, phone)"
df_address.rename(columns={"address_id": "location_id"})
dim_location = df_address.loc[:, "location_id", "address_line_1", "address_line_2", "district", "city", "postal_code" "country", "phone"]
# creates the dim_counterparty dataframe
-counterparty_address = pd.merge(df_counterparty, df_address, left_on="legal_address_id", right_on='address_id', how="outer")
-counterparty_address.rename(columns={"address_line_1": "counterparty_legal_address_line_1", "address_line_2": "counterparty_legal_address_line_2",
- "district": "counterparty_legal_district", "city": "counterparty_legal_city", "postal_code": "counterparty_postal_code",
- "country": "counterparty_legal_country", "phone": "counterparty_legal_phone_number"})
+counterparty_address = pd.merge(
+ df_counterparty,
+ df_address,
+ left_on="legal_address_id",
+ right_on="address_id",
+ how="outer"
+)
+counterparty_address.rename(
+ columns={
+ "address_line_1": "counterparty_legal_address_line_1",
+ "address_line_2": "counterparty_legal_address_line_2",
+ "district": "counterparty_legal_district",
+ "city": "counterparty_legal_city",
+ "postal_code": "counterparty_postal_code",
+ "country": "counterparty_legal_country",
+ "phone": "counterparty_legal_phone_number",
+ }
+)
dim_counterparty = df_counterparty.loc[:, "counterparty_id", "counterparty_legal_name", "counterparty_legal_address_line_1",
"counterparty_legal_address_line_2", "counterparty_legal_district", "counterpart_legal_city",
@@ -68,4 +83,4 @@ dim_date = ["date_id", "year", "month", "day", "day_of_week", "day_name", "month
# TO DO:
-# fact_sales_order \ No newline at end of file
+# fact_sales_order
diff --git a/src/transform_lambda.py b/src/transform_lambda.py
index 9238180..920a24f 100644
--- a/src/transform_lambda.py
+++ b/src/transform_lambda.py
@@ -1,8 +1,6 @@
import json
import boto3
import re
-import io
-from io import StringIO
import pandas as pd
@@ -35,3 +33,5 @@ def read_from_s3_subfolder_to_df(tables, bucket, client=boto3.client("s3")):
list_of_df = [pd.read_csv(key) for key in list_of_keys]
table_dfs[table] = pd.concat(list_of_df)
return table_dfs
+
+
git.ajschof.me — hosted by ajschofield — powered by cgit