From 0c02bd3636ed8815aadf73685c20f8c76a073c99 Mon Sep 17 00:00:00 2001 From: "deepsource-autofix[bot]" <62050782+deepsource-autofix[bot]@users.noreply.github.com> Date: Wed, 21 Aug 2024 15:09:58 +0000 Subject: style: format code with Autopep8, Black and Ruff Formatter This commit fixes the style issues introduced in 20a3bd8 according to the output from Autopep8, Black and Ruff Formatter. Details: https://github.com/ajschofield/de-project-bentley/pull/85 --- src/fact-sales-order.py | 86 ++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 68 insertions(+), 18 deletions(-) diff --git a/src/fact-sales-order.py b/src/fact-sales-order.py index 30c958f..399e435 100644 --- a/src/fact-sales-order.py +++ b/src/fact-sales-order.py @@ -1,7 +1,8 @@ import pandas as pd from src.transform_lambda import get_dataframes -dict_of_df = get_dataframes() # {"design": "design dataframe", "address": "address dataframe", ....} +# {"design": "design dataframe", "address": "address dataframe", ....} +dict_of_df = get_dataframes() # iterates through each dataframe in the list of dataframes and assigns them to a variable @@ -17,12 +18,23 @@ df_sales = dict_of_df[sales] dim_design = df_design["design_id", "design_name", "file_name", "file_location"] # creates the dim_staff dataframe -staff_department = pd.merge(df_staff, df_department, on='department_id', how="outer") -dim_staff = staff_department['staff_id', 'first_name', 'last_name', 'department_name', 'location', 'email_address'] +staff_department = pd.merge(df_staff, df_department, on="department_id", how="outer") +dim_staff = staff_department[ + "staff_id", + "first_name", + "last_name", + "department_name", + "location", + "email_address", +] # creates the dim_currency dataframe -# currency names currently hardcoded and not taken from database, is this viable/how else to do this? -d = {"currency_id": [1, 2, 3], "currency_code": ["GBP", "USD", "EUR"], "currency_name": ["Pound", "US Dollar", "Euro"]} +# currency names currently hardcoded and not taken from database, is this viable/how else to do this? +d = { + "currency_id": [1, 2, 3], + "currency_code": ["GBP", "USD", "EUR"], + "currency_name": ["Pound", "US Dollar", "Euro"], +} currency_names = pd.DataFrame(data=d) join_currency = pd.merge(df_currency, currency_names, on="currency_name", how="outer") dim_currency = join_currency["currency_id", "currency_code", "currency_name"] @@ -37,22 +49,51 @@ dim_currency = join_currency["currency_id", "currency_code", "currency_name"] # dim_currency["currency_name"] = dim_currency["currency_code"].map(mappings) - # creates the dim_location dataframe -# need to change address id to location id +# need to change address id to location id "dim_location dataframe: (location_id, address_line_1, address_line_2, district, city, postal code, country, phone)" df_address.rename(columns={"address_id": "location_id"}) -dim_location = df_address["location_id", "address_line_1", "address_line_2", "district", "city", "postal_code" "country", "phone"] +dim_location = df_address[ + "location_id", + "address_line_1", + "address_line_2", + "district", + "city", + "postal_code" "country", + "phone", +] # creates the dim_counterparty dataframe -counterparty_address = pd.merge(df_counterparty, df_address, left_on="legal_address_id", right_on='address_id', how="outer") -counterparty_address.rename(columns={"address_line_1": "counterparty_legal_address_line_1", "address_line_2": "counterparty_legal_address_line_2", - "district": "counterparty_legal_district", "city": "counterparty_legal_city", "postal_code": "counterparty_postal_code", - "country": "counterparty_legal_country", "phone": "counterparty_legal_phone_number"}) - -dim_counterparty = df_counterparty["counterparty_id", "counterparty_legal_name", "counterparty_legal_address_line_1", - "counterparty_legal_address_line_2", "counterparty_legal_district", "counterpart_legal_city", - "counterparty_legal_postal_code", "counterparty_legal_country", "counterparty_legal_phone_number"] +counterparty_address = pd.merge( + df_counterparty, + df_address, + left_on="legal_address_id", + right_on="address_id", + how="outer", +) +counterparty_address.rename( + columns={ + "address_line_1": "counterparty_legal_address_line_1", + "address_line_2": "counterparty_legal_address_line_2", + "district": "counterparty_legal_district", + "city": "counterparty_legal_city", + "postal_code": "counterparty_postal_code", + "country": "counterparty_legal_country", + "phone": "counterparty_legal_phone_number", + } +) + +dim_counterparty = df_counterparty[ + "counterparty_id", + "counterparty_legal_name", + "counterparty_legal_address_line_1", + "counterparty_legal_address_line_2", + "counterparty_legal_district", + "counterpart_legal_city", + "counterparty_legal_postal_code", + "counterparty_legal_country", + "counterparty_legal_phone_number", +] # creates the dim_date dataframe df_sales = df_sales["agreed_delivery_date"] @@ -65,8 +106,17 @@ df_sales["day_name"] = df_sales["agreed_delivery_date"].dt.day_name() df_sales["month_name"] = df_sales["agreed_delivery_date"].dt.month_name() df_sales["quarter"] = df_sales["agreed_delivery_date"].dt.quarter() -dim_date = ["date_id", "year", "month", "day", "day_of_week", "day_name", "month_name", "quarter"] #series.dt.quarter() +dim_date = [ + "date_id", + "year", + "month", + "day", + "day_of_week", + "day_name", + "month_name", + "quarter", +] # series.dt.quarter() # TO DO: -# fact_sales_order \ No newline at end of file +# fact_sales_order -- cgit v1.2.3 From 5b2b4864eae129e112e70d093eb66498d7de401e Mon Sep 17 00:00:00 2001 From: lian-manonog Date: Wed, 21 Aug 2024 17:11:57 +0100 Subject: wip: fact_purchase_order schema --- src/fact-purchase-table.py | 34 ++++++++++++++++++++++++++++++++++ src/fact-sales-order.py | 2 +- src/transform_lambda.py | 4 ++-- 3 files changed, 37 insertions(+), 3 deletions(-) create mode 100644 src/fact-purchase-table.py diff --git a/src/fact-purchase-table.py b/src/fact-purchase-table.py new file mode 100644 index 0000000..53c0148 --- /dev/null +++ b/src/fact-purchase-table.py @@ -0,0 +1,34 @@ +from src.transform_lambda import read_from_s3_subfolder_to_df, tables +from src.extract_lambda import extract_bucket +import json +import boto3 +import re +import pandas as pd + + +dict_of_df = read_from_s3_subfolder_to_df(tables, extract_bucket(), client=boto3.client("s3")) + + +# iterates through each dataframe in the list of dataframes and assigns them to a variable +df_staff = dict_of_df['staff'] ##no change +df_currency = dict_of_df['currency'] ##scraping API +df_counterparty = dict_of_df['counterparty'] +df_address = dict_of_df['address'] +df_department = dict_of_df['department'] +df_purchase_order = dict_of_df['purchase_order'] + +## dim_staff table is the same across the schemas (no change) + +## dim_counterparty table + +## dim_location df_currency --> drops 2 columns +dim_location = df_address.drop(labels=['created_at', 'last_updated'], axis=1).rename(columns={'address_id': 'location_id'}) + +## dim_counterparty +df_prefixed_address = df_address.add_prefix('counterparty_legal_', axis=1) +pd.merge(df_counterparty, + df_prefixed_address, + left_on="legal_address_id", + right_on="address_id", + how="outer") + diff --git a/src/fact-sales-order.py b/src/fact-sales-order.py index 399e435..57e2e84 100644 --- a/src/fact-sales-order.py +++ b/src/fact-sales-order.py @@ -69,7 +69,7 @@ counterparty_address = pd.merge( df_address, left_on="legal_address_id", right_on="address_id", - how="outer", + how="outer" ) counterparty_address.rename( columns={ diff --git a/src/transform_lambda.py b/src/transform_lambda.py index 9238180..920a24f 100644 --- a/src/transform_lambda.py +++ b/src/transform_lambda.py @@ -1,8 +1,6 @@ import json import boto3 import re -import io -from io import StringIO import pandas as pd @@ -35,3 +33,5 @@ def read_from_s3_subfolder_to_df(tables, bucket, client=boto3.client("s3")): list_of_df = [pd.read_csv(key) for key in list_of_keys] table_dfs[table] = pd.concat(list_of_df) return table_dfs + + -- cgit v1.2.3