diff options
| -rw-r--r-- | src/fact-sales-order.py | 78 |
1 files changed, 21 insertions, 57 deletions
diff --git a/src/fact-sales-order.py b/src/fact-sales-order.py index 57e2e84..870f660 100644 --- a/src/fact-sales-order.py +++ b/src/fact-sales-order.py @@ -15,53 +15,29 @@ df_counterparty = dict_of_df[counterparty] df_sales = dict_of_df[sales] # creates the dim_design dataframe -dim_design = df_design["design_id", "design_name", "file_name", "file_location"] +dim_design = df_design.loc[:, "design_id", "design_name", "file_name", "file_location"] # creates the dim_staff dataframe -staff_department = pd.merge(df_staff, df_department, on="department_id", how="outer") -dim_staff = staff_department[ - "staff_id", - "first_name", - "last_name", - "department_name", - "location", - "email_address", -] +staff_department = pd.merge(df_staff, df_department, on='department_id', how="outer") +dim_staff = staff_department.loc[:, 'staff_id', 'first_name', 'last_name', 'department_name', 'location', 'email_address'] # creates the dim_currency dataframe -# currency names currently hardcoded and not taken from database, is this viable/how else to do this? -d = { - "currency_id": [1, 2, 3], - "currency_code": ["GBP", "USD", "EUR"], - "currency_name": ["Pound", "US Dollar", "Euro"], +# Using .map to add currency_name column and link it to the currency code +dim_currency = df_currency.loc[:, "currency_id", "currency_code"] +mappings = { + "GBP": "Pound", + "USD": "US Dollar", + "EUR": "Euro" } -currency_names = pd.DataFrame(data=d) -join_currency = pd.merge(df_currency, currency_names, on="currency_name", how="outer") -dim_currency = join_currency["currency_id", "currency_code", "currency_name"] +dim_currency["currency_name"] = dim_currency["currency_code"].map(mappings) -# Using .map to add currency_name column and link it to the currency code -# dim_currency = df_currency["currency_id", "currency_code"] -# mappings = { -# "GBP": "Pound", -# "USD": "US Dollar", -# "EUR": "Euro" -# } -# dim_currency["currency_name"] = dim_currency["currency_code"].map(mappings) # creates the dim_location dataframe # need to change address id to location id "dim_location dataframe: (location_id, address_line_1, address_line_2, district, city, postal code, country, phone)" df_address.rename(columns={"address_id": "location_id"}) -dim_location = df_address[ - "location_id", - "address_line_1", - "address_line_2", - "district", - "city", - "postal_code" "country", - "phone", -] +dim_location = df_address.loc[:, "location_id", "address_line_1", "address_line_2", "district", "city", "postal_code" "country", "phone"] # creates the dim_counterparty dataframe counterparty_address = pd.merge( @@ -83,20 +59,12 @@ counterparty_address.rename( } ) -dim_counterparty = df_counterparty[ - "counterparty_id", - "counterparty_legal_name", - "counterparty_legal_address_line_1", - "counterparty_legal_address_line_2", - "counterparty_legal_district", - "counterpart_legal_city", - "counterparty_legal_postal_code", - "counterparty_legal_country", - "counterparty_legal_phone_number", -] +dim_counterparty = df_counterparty.loc[:, "counterparty_id", "counterparty_legal_name", "counterparty_legal_address_line_1", + "counterparty_legal_address_line_2", "counterparty_legal_district", "counterpart_legal_city", + "counterparty_legal_postal_code", "counterparty_legal_country", "counterparty_legal_phone_number"] # creates the dim_date dataframe -df_sales = df_sales["agreed_delivery_date"] +df_sales = df_sales.loc[:, "agreed_delivery_date"] df_sales["agreed_delivery_date"] = pd.to_datetime["agreed_delivery_date"] df_sales["year"] = df_sales["agreed_delivery_date"].dt.year df_sales["month"] = df_sales["agreed_delivery_date"].dt.month @@ -106,16 +74,12 @@ df_sales["day_name"] = df_sales["agreed_delivery_date"].dt.day_name() df_sales["month_name"] = df_sales["agreed_delivery_date"].dt.month_name() df_sales["quarter"] = df_sales["agreed_delivery_date"].dt.quarter() -dim_date = [ - "date_id", - "year", - "month", - "day", - "day_of_week", - "day_name", - "month_name", - "quarter", -] # series.dt.quarter() +# repeat ln 52 - 60 for each column +# merge dataframes into one dataframe +# remove duplicates + + +dim_date = ["date_id", "year", "month", "day", "day_of_week", "day_name", "month_name", "quarter"] #series.dt.quarter() # TO DO: |
