From 0c02bd3636ed8815aadf73685c20f8c76a073c99 Mon Sep 17 00:00:00 2001
From: "deepsource-autofix[bot]"
 <62050782+deepsource-autofix[bot]@users.noreply.github.com>
Date: Wed, 21 Aug 2024 15:09:58 +0000
Subject: style: format code with Autopep8, Black and Ruff Formatter

This commit fixes the style issues introduced in 20a3bd8 according to the output
from Autopep8, Black and Ruff Formatter.

Details: https://github.com/ajschofield/de-project-bentley/pull/85
---
 src/fact-sales-order.py | 86 ++++++++++++++++++++++++++++++++++++++-----------
 1 file changed, 68 insertions(+), 18 deletions(-)

diff --git a/src/fact-sales-order.py b/src/fact-sales-order.py
index 30c958f..399e435 100644
--- a/src/fact-sales-order.py
+++ b/src/fact-sales-order.py
@@ -1,7 +1,8 @@
 import pandas as pd
 from src.transform_lambda import get_dataframes
 
-dict_of_df = get_dataframes()  # {"design": "design dataframe", "address": "address dataframe", ....}
+# {"design": "design dataframe", "address": "address dataframe", ....}
+dict_of_df = get_dataframes()
 
 
 # iterates through each dataframe in the list of dataframes and assigns them to a variable
@@ -17,12 +18,23 @@ df_sales = dict_of_df[sales]
 dim_design = df_design["design_id", "design_name", "file_name", "file_location"]
 
 # creates the dim_staff dataframe
-staff_department = pd.merge(df_staff, df_department, on='department_id', how="outer")
-dim_staff = staff_department['staff_id', 'first_name', 'last_name', 'department_name', 'location', 'email_address']
+staff_department = pd.merge(df_staff, df_department, on="department_id", how="outer")
+dim_staff = staff_department[
+    "staff_id",
+    "first_name",
+    "last_name",
+    "department_name",
+    "location",
+    "email_address",
+]
 
 # creates the dim_currency dataframe
-# currency names currently hardcoded and not taken from database, is this viable/how else to do this? 
-d = {"currency_id": [1, 2, 3], "currency_code": ["GBP", "USD", "EUR"], "currency_name": ["Pound", "US Dollar", "Euro"]}
+# currency names currently hardcoded and not taken from database, is this viable/how else to do this?
+d = {
+    "currency_id": [1, 2, 3],
+    "currency_code": ["GBP", "USD", "EUR"],
+    "currency_name": ["Pound", "US Dollar", "Euro"],
+}
 currency_names = pd.DataFrame(data=d)
 join_currency = pd.merge(df_currency, currency_names, on="currency_name", how="outer")
 dim_currency = join_currency["currency_id", "currency_code", "currency_name"]
@@ -37,22 +49,51 @@ dim_currency = join_currency["currency_id", "currency_code", "currency_name"]
 # dim_currency["currency_name"] = dim_currency["currency_code"].map(mappings)
 
 
-
 # creates the dim_location dataframe
-# need to change address id to location id 
+# need to change address id to location id
 "dim_location dataframe: (location_id, address_line_1, address_line_2, district, city, postal code, country, phone)"
 df_address.rename(columns={"address_id": "location_id"})
-dim_location = df_address["location_id", "address_line_1", "address_line_2", "district", "city", "postal_code" "country", "phone"]
+dim_location = df_address[
+    "location_id",
+    "address_line_1",
+    "address_line_2",
+    "district",
+    "city",
+    "postal_code" "country",
+    "phone",
+]
 
 # creates the dim_counterparty dataframe
-counterparty_address = pd.merge(df_counterparty, df_address, left_on="legal_address_id", right_on='address_id', how="outer")
-counterparty_address.rename(columns={"address_line_1": "counterparty_legal_address_line_1", "address_line_2": "counterparty_legal_address_line_2",
-                                     "district": "counterparty_legal_district", "city": "counterparty_legal_city", "postal_code": "counterparty_postal_code",
-                                     "country": "counterparty_legal_country", "phone": "counterparty_legal_phone_number"})
-
-dim_counterparty = df_counterparty["counterparty_id", "counterparty_legal_name", "counterparty_legal_address_line_1",
-                                   "counterparty_legal_address_line_2", "counterparty_legal_district", "counterpart_legal_city",
-                                   "counterparty_legal_postal_code", "counterparty_legal_country", "counterparty_legal_phone_number"]
+counterparty_address = pd.merge(
+    df_counterparty,
+    df_address,
+    left_on="legal_address_id",
+    right_on="address_id",
+    how="outer",
+)
+counterparty_address.rename(
+    columns={
+        "address_line_1": "counterparty_legal_address_line_1",
+        "address_line_2": "counterparty_legal_address_line_2",
+        "district": "counterparty_legal_district",
+        "city": "counterparty_legal_city",
+        "postal_code": "counterparty_postal_code",
+        "country": "counterparty_legal_country",
+        "phone": "counterparty_legal_phone_number",
+    }
+)
+
+dim_counterparty = df_counterparty[
+    "counterparty_id",
+    "counterparty_legal_name",
+    "counterparty_legal_address_line_1",
+    "counterparty_legal_address_line_2",
+    "counterparty_legal_district",
+    "counterpart_legal_city",
+    "counterparty_legal_postal_code",
+    "counterparty_legal_country",
+    "counterparty_legal_phone_number",
+]
 
 # creates the dim_date dataframe
 df_sales = df_sales["agreed_delivery_date"]
@@ -65,8 +106,17 @@ df_sales["day_name"] = df_sales["agreed_delivery_date"].dt.day_name()
 df_sales["month_name"] = df_sales["agreed_delivery_date"].dt.month_name()
 df_sales["quarter"] = df_sales["agreed_delivery_date"].dt.quarter()
 
-dim_date = ["date_id", "year", "month", "day", "day_of_week", "day_name", "month_name", "quarter"]   #series.dt.quarter()
+dim_date = [
+    "date_id",
+    "year",
+    "month",
+    "day",
+    "day_of_week",
+    "day_name",
+    "month_name",
+    "quarter",
+]  # series.dt.quarter()
 
 
 # TO DO:
-# fact_sales_order
\ No newline at end of file
+# fact_sales_order
-- 
cgit v1.2.3


From 5b2b4864eae129e112e70d093eb66498d7de401e Mon Sep 17 00:00:00 2001
From: lian-manonog <lian.manonog@gmail.com>
Date: Wed, 21 Aug 2024 17:11:57 +0100
Subject: wip: fact_purchase_order schema

---
 src/fact-purchase-table.py | 34 ++++++++++++++++++++++++++++++++++
 src/fact-sales-order.py    |  2 +-
 src/transform_lambda.py    |  4 ++--
 3 files changed, 37 insertions(+), 3 deletions(-)
 create mode 100644 src/fact-purchase-table.py

diff --git a/src/fact-purchase-table.py b/src/fact-purchase-table.py
new file mode 100644
index 0000000..53c0148
--- /dev/null
+++ b/src/fact-purchase-table.py
@@ -0,0 +1,34 @@
+from src.transform_lambda import read_from_s3_subfolder_to_df, tables
+from src.extract_lambda import extract_bucket
+import json
+import boto3
+import re
+import pandas as pd
+
+
+dict_of_df = read_from_s3_subfolder_to_df(tables, extract_bucket(), client=boto3.client("s3"))
+
+
+# iterates through each dataframe in the list of dataframes and assigns them to a variable
+df_staff = dict_of_df['staff'] ##no change
+df_currency = dict_of_df['currency'] ##scraping API 
+df_counterparty = dict_of_df['counterparty']
+df_address = dict_of_df['address']
+df_department = dict_of_df['department']
+df_purchase_order = dict_of_df['purchase_order']
+
+## dim_staff table is the same across the schemas (no change)
+
+## dim_counterparty table
+
+## dim_location df_currency --> drops 2 columns
+dim_location = df_address.drop(labels=['created_at', 'last_updated'], axis=1).rename(columns={'address_id': 'location_id'})
+
+## dim_counterparty 
+df_prefixed_address = df_address.add_prefix('counterparty_legal_', axis=1) 
+pd.merge(df_counterparty, 
+         df_prefixed_address, 
+         left_on="legal_address_id", 
+         right_on="address_id", 
+         how="outer")
+
diff --git a/src/fact-sales-order.py b/src/fact-sales-order.py
index 399e435..57e2e84 100644
--- a/src/fact-sales-order.py
+++ b/src/fact-sales-order.py
@@ -69,7 +69,7 @@ counterparty_address = pd.merge(
     df_address,
     left_on="legal_address_id",
     right_on="address_id",
-    how="outer",
+    how="outer"
 )
 counterparty_address.rename(
     columns={
diff --git a/src/transform_lambda.py b/src/transform_lambda.py
index 9238180..920a24f 100644
--- a/src/transform_lambda.py
+++ b/src/transform_lambda.py
@@ -1,8 +1,6 @@
 import json
 import boto3
 import re
-import io
-from io import StringIO
 import pandas as pd
 
 
@@ -35,3 +33,5 @@ def read_from_s3_subfolder_to_df(tables, bucket, client=boto3.client("s3")):
         list_of_df = [pd.read_csv(key) for key in list_of_keys]
         table_dfs[table] = pd.concat(list_of_df)
     return table_dfs
+
+
-- 
cgit v1.2.3