aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--src/fact-sales-order.py78
1 files changed, 21 insertions, 57 deletions
diff --git a/src/fact-sales-order.py b/src/fact-sales-order.py
index 57e2e84..870f660 100644
--- a/src/fact-sales-order.py
+++ b/src/fact-sales-order.py
@@ -15,53 +15,29 @@ df_counterparty = dict_of_df[counterparty]
df_sales = dict_of_df[sales]
# creates the dim_design dataframe
-dim_design = df_design["design_id", "design_name", "file_name", "file_location"]
+dim_design = df_design.loc[:, "design_id", "design_name", "file_name", "file_location"]
# creates the dim_staff dataframe
-staff_department = pd.merge(df_staff, df_department, on="department_id", how="outer")
-dim_staff = staff_department[
- "staff_id",
- "first_name",
- "last_name",
- "department_name",
- "location",
- "email_address",
-]
+staff_department = pd.merge(df_staff, df_department, on='department_id', how="outer")
+dim_staff = staff_department.loc[:, 'staff_id', 'first_name', 'last_name', 'department_name', 'location', 'email_address']
# creates the dim_currency dataframe
-# currency names currently hardcoded and not taken from database, is this viable/how else to do this?
-d = {
- "currency_id": [1, 2, 3],
- "currency_code": ["GBP", "USD", "EUR"],
- "currency_name": ["Pound", "US Dollar", "Euro"],
+# Using .map to add currency_name column and link it to the currency code
+dim_currency = df_currency.loc[:, "currency_id", "currency_code"]
+mappings = {
+ "GBP": "Pound",
+ "USD": "US Dollar",
+ "EUR": "Euro"
}
-currency_names = pd.DataFrame(data=d)
-join_currency = pd.merge(df_currency, currency_names, on="currency_name", how="outer")
-dim_currency = join_currency["currency_id", "currency_code", "currency_name"]
+dim_currency["currency_name"] = dim_currency["currency_code"].map(mappings)
-# Using .map to add currency_name column and link it to the currency code
-# dim_currency = df_currency["currency_id", "currency_code"]
-# mappings = {
-# "GBP": "Pound",
-# "USD": "US Dollar",
-# "EUR": "Euro"
-# }
-# dim_currency["currency_name"] = dim_currency["currency_code"].map(mappings)
# creates the dim_location dataframe
# need to change address id to location id
"dim_location dataframe: (location_id, address_line_1, address_line_2, district, city, postal code, country, phone)"
df_address.rename(columns={"address_id": "location_id"})
-dim_location = df_address[
- "location_id",
- "address_line_1",
- "address_line_2",
- "district",
- "city",
- "postal_code" "country",
- "phone",
-]
+dim_location = df_address.loc[:, "location_id", "address_line_1", "address_line_2", "district", "city", "postal_code" "country", "phone"]
# creates the dim_counterparty dataframe
counterparty_address = pd.merge(
@@ -83,20 +59,12 @@ counterparty_address.rename(
}
)
-dim_counterparty = df_counterparty[
- "counterparty_id",
- "counterparty_legal_name",
- "counterparty_legal_address_line_1",
- "counterparty_legal_address_line_2",
- "counterparty_legal_district",
- "counterpart_legal_city",
- "counterparty_legal_postal_code",
- "counterparty_legal_country",
- "counterparty_legal_phone_number",
-]
+dim_counterparty = df_counterparty.loc[:, "counterparty_id", "counterparty_legal_name", "counterparty_legal_address_line_1",
+ "counterparty_legal_address_line_2", "counterparty_legal_district", "counterpart_legal_city",
+ "counterparty_legal_postal_code", "counterparty_legal_country", "counterparty_legal_phone_number"]
# creates the dim_date dataframe
-df_sales = df_sales["agreed_delivery_date"]
+df_sales = df_sales.loc[:, "agreed_delivery_date"]
df_sales["agreed_delivery_date"] = pd.to_datetime["agreed_delivery_date"]
df_sales["year"] = df_sales["agreed_delivery_date"].dt.year
df_sales["month"] = df_sales["agreed_delivery_date"].dt.month
@@ -106,16 +74,12 @@ df_sales["day_name"] = df_sales["agreed_delivery_date"].dt.day_name()
df_sales["month_name"] = df_sales["agreed_delivery_date"].dt.month_name()
df_sales["quarter"] = df_sales["agreed_delivery_date"].dt.quarter()
-dim_date = [
- "date_id",
- "year",
- "month",
- "day",
- "day_of_week",
- "day_name",
- "month_name",
- "quarter",
-] # series.dt.quarter()
+# repeat ln 52 - 60 for each column
+# merge dataframes into one dataframe
+# remove duplicates
+
+
+dim_date = ["date_id", "year", "month", "day", "day_of_week", "day_name", "month_name", "quarter"] #series.dt.quarter()
# TO DO:
git.ajschof.me — hosted by ajschofield — powered by cgit