diff options
| -rw-r--r-- | src/dataframes.py | 111 | ||||
| -rw-r--r-- | tests/test_dataframes.py (renamed from tests/test_fact_sales_order.py) | 41 |
2 files changed, 95 insertions, 57 deletions
diff --git a/src/dataframes.py b/src/dataframes.py index ab53063..e60123a 100644 --- a/src/dataframes.py +++ b/src/dataframes.py @@ -16,85 +16,78 @@ import requests # dim_counterparty +# no test, same as fact_payment def create_fact_sales_order(dict_of_df): df_sales = dict_of_df["sales_order"] df_sales.index.name = "sales_record_id" - df_sales["created_date"] = pd.to_datetime(df_sales["created_at"]).dt.date - df_sales["created_time"] = pd.to_datetime(df_sales["created_at"]).dt.time - df_sales["last_updated_date"] = pd.to_datetime(df_sales["last_updated"]).dt.date - df_sales["last_updated_time"] = pd.to_datetime(df_sales["last_updated"]).dt.time - fact_sales_order = df_sales.loc[ - :, - [ - "sales_record_id", - "sales_order_id", - "created_date", - "created_time", - "last_updated_date", - "last_updated_time", - "sales_staff_id", - "counterparty_id", - "units_sold", - "unit_price", - "currency_id", - "design_id", - "agreed_payment_date", - "agreed_delivery_date", - "agreed_delivery_location_id", - ], - ] - return fact_sales_order + df_sales["created_date"] = pd.to_datetime(df_sales["created_at"], format="%Y-%m-%d") + df_sales["created_time"] = pd.to_datetime(df_sales["created_at"], format="%H-%M-%S") + df_sales["last_updated_date"] = pd.to_datetime( + df_sales["last_updated"], format="%Y-%m-%d" + ) + df_sales["last_updated_time"] = pd.to_datetime( + df_sales["last_updated"], format="%H-%M-%S" + ) + df_sales["agreed_delivery_date"] = pd.to_datetime( + df_sales["agreed_delivery_date"], format="%Y-%m-%d" + ) + df_sales["agreed_payment_date"] = pd.to_datetime( + df_sales["agreed_payment_date"], format="%Y-%m-%d" + ) + df_sales.drop(labels=["created_at", "last_updated"], axis=1, inplace=True) + df_sales.reset_index(inplace=True) + return df_sales -# fact_purchase_order from purchase_order +# no test, same as fact_payment def create_fact_purchase_orders(dict_of_df): df_po = dict_of_df["purchase_order"] df_po.index.name = "purchase_record_id" - df_po["created_date"] = df_po["created_at"].date() - df_po["created_time"] = df_po["created_at"].dt.time - df_po["last_updated_date"] = df_po["last_updated_at"].date() - df_po["last_updated_time"] = df_po["last_updated_at"].dt.time + df_po["created_date"] = pd.to_datetime(df_po["created_at"], format="%Y-%m-%d") + df_po["created_time"] = pd.to_datetime(df_po["created_at"], format="%H-%M-%S") + df_po["last_updated_date"] = pd.to_datetime( + df_po["last_updated"], format="%Y-%m-%d" + ) + df_po["last_updated_time"] = pd.to_datetime( + df_po["last_updated"], format="%H-%M-%S" + ) df_po["agreed_delivery_date"] = pd.to_datetime( df_po["agreed_delivery_date"], format="%Y-%m-%d" ) df_po["agreed_payment_date"] = pd.to_datetime( df_po["agreed_payment_date"], format="%Y-%m-%d" ) - df_po.drop(labels=["created_at", "last_updated_at"], axis=1, inplace=True) + df_po.drop(labels=["created_at", "last_updated"], axis=1, inplace=True) + df_po.reset_index(inplace=True) return df_po +# test passed + + def create_fact_payment(dict_of_df): df_payment = dict_of_df["payment"] df_payment.index.name = "payment_record_id" - df_payment["created_date"] = df_payment["created_at"].date() - df_payment["created_time"] = df_payment["created_at"].time - df_payment["last_updated_date"] = df_payment["last_updated"].date() - df_payment["last_updated_time"] = df_payment["last_updated"].time + df_payment["created_date"] = pd.to_datetime( + df_payment["created_at"], format="%Y-%m-%d" + ) + df_payment["created_time"] = pd.to_datetime( + df_payment["created_at"], format="%H-%M-%S" + ) + df_payment["last_updated_date"] = pd.to_datetime( + df_payment["last_updated"], format="%Y-%m-%d" + ) + df_payment["last_updated_time"] = pd.to_datetime( + df_payment["last_updated"], format="%H-%M-%S" + ) df_payment["payment_date"] = pd.to_datetime( df_payment["payment_date"], format="%Y-%m-%d" ) - fact_payment = df_payment.loc[ - :, - [ - "payment_record_id", - "payment_id", - "created_date", - "created_time", - "last_updated_date", - "last_updated_time", - "transaction_id", - "counterparty_id", - "payment_amount", - "currency_id", - "payment_type_id", - "paid", - "payment_date", - ], - ] - return fact_payment + df_payment.drop(labels=["created_at", "last_updated"], axis=1, inplace=True) + df_payment.reset_index(inplace=True) + return df_payment # test passed @@ -108,6 +101,8 @@ def create_dim_transaction(dict_of_df): # test passed + + def create_dim_location(dict_of_df): df_loc = ( dict_of_df["address"] @@ -143,11 +138,11 @@ def create_dim_date(dict_of_df): create_fact_purchase_orders(dict_of_df), create_fact_sales_order(dict_of_df), ] - date_col_names = [ - col_name for col_name in list(fact_dfs[0].columns) if "date" in col_name - ] list_of_date_columns = [] for df in fact_dfs: + date_col_names = [ + col_name for col_name in list(df.columns) if "date" in col_name + ] for col in date_col_names: list_of_date_columns.append(df[col]) sr_date = pd.array(pd.concat(list_of_date_columns), dtype="datetime64[ns]") @@ -164,6 +159,8 @@ def create_dim_date(dict_of_df): # tests passed + + def scrape_currency_names(): response = requests.get("https://www.xe.com/currency/").content soup = BeautifulSoup(response, "html.parser") diff --git a/tests/test_fact_sales_order.py b/tests/test_dataframes.py index a245379..584ab27 100644 --- a/tests/test_fact_sales_order.py +++ b/tests/test_dataframes.py @@ -244,3 +244,44 @@ class TestCreateDimTransaction: } result = create_dim_transaction(dict_df) assert list(result.columns) == ["transaction_id", "some_other_id"] + + +class TestCreateFactPayment: + def test_returns_correct_columns_payment(self): + dict_df = { + "payment": pd.DataFrame( + data=[ + [ + dt(2020, 5, 17, 6, 15, 20), + dt(2020, 5, 20, 8, 19, 30), + 1, + "SE18 9QO", + "2020-7-16", + ] + ], + columns=[ + "created_at", + "last_updated", + "payment_id", + "some_other_id", + "payment_date", + ], + ) + } + expected_cols = [ + "payment_record_id", + "created_date", + "created_time", + "last_updated_date", + "last_updated_time", + "payment_date", + "payment_id", + "some_other_id", + ] + result = create_fact_payment(dict_df) + assert isinstance(result, pd.DataFrame) + for col in list(result.columns): + assert col in expected_cols + for col in expected_cols: + if "date" in col: + assert result[col].dtype == "datetime64[ns]" |
