diff options
| author | Ang Bel <anzelikabelotelova@Anzelikas-MacBook-Air.local> | 2024-08-27 15:18:54 +0100 |
|---|---|---|
| committer | Ang Bel <anzelikabelotelova@Anzelikas-MacBook-Air.local> | 2024-08-27 15:18:54 +0100 |
| commit | 22df92bcce7ec2d9e713b9609ffdd604d207e713 (patch) | |
| tree | 411ac7a2fac0895a977bd44d1e3a62e20e307092 | |
| parent | a05a3718621b2c30b4357e2b90af6da0d89c6990 (diff) | |
| download | de-project-bentley-22df92bcce7ec2d9e713b9609ffdd604d207e713.tar.gz de-project-bentley-22df92bcce7ec2d9e713b9609ffdd604d207e713.zip | |
test: refactored fact functions with test passing
| -rw-r--r-- | src/dataframes.py | 24 | ||||
| -rw-r--r-- | tests/test_dataframes.py | 9 |
2 files changed, 19 insertions, 14 deletions
diff --git a/src/dataframes.py b/src/dataframes.py index 41f39b8..1f445a4 100644 --- a/src/dataframes.py +++ b/src/dataframes.py @@ -20,10 +20,10 @@ import requests def create_fact_sales_order(dict_of_df): df_sales = dict_of_df["sales_order"] df_sales.index.name = "sales_record_id" - df_sales["created_date"] = pd.to_datetime(df_sales["created_at"],format='%Y-%m-%d') - df_sales["created_time"] = pd.to_datetime(df_sales["created_at"],format='%H-%M-%S') - df_sales["last_updated_date"] = pd.to_datetime(df_sales["last_updated"],format='%Y-%m-%d') - df_sales["last_updated_time"] = pd.to_datetime(df_sales["last_updated"],format='%H-%M-%S') + df_sales["created_date"] = pd.to_datetime(df_sales["created_at"].dt.date,format='%Y-%m-%d') + df_sales["created_time"] = df_sales["created_at"].dt.floor('s').dt.time + df_sales["last_updated_date"] = pd.to_datetime(df_sales["last_updated"].dt.date,format='%Y-%m-%d') + df_sales["last_updated_time"] = df_sales["last_updated"].dt.floor('s').dt.time df_sales['agreed_delivery_date'] = pd.to_datetime(df_sales['agreed_delivery_date'],format="%Y-%m-%d") df_sales['agreed_payment_date'] = pd.to_datetime(df_sales['agreed_payment_date'],format="%Y-%m-%d") df_sales.drop(labels=['created_at','last_updated'],axis=1,inplace=True) @@ -34,10 +34,10 @@ def create_fact_sales_order(dict_of_df): def create_fact_purchase_orders(dict_of_df): df_po = dict_of_df['purchase_order'] df_po.index.name = 'purchase_record_id' - df_po['created_date'] = pd.to_datetime(df_po['created_at'],format='%Y-%m-%d') - df_po['created_time'] = pd.to_datetime(df_po['created_at'],format='%H-%M-%S') - df_po['last_updated_date'] = pd.to_datetime(df_po['last_updated'],format='%Y-%m-%d') - df_po['last_updated_time'] = pd.to_datetime(df_po['last_updated'],format='%H-%M-%S') + df_po['created_date'] = pd.to_datetime(df_po['created_at'].dt.date,format='%Y-%m-%d') + df_po['created_time'] = df_po['created_at'].dt.floor('s').dt.time + df_po['last_updated_date'] = pd.to_datetime(df_po['last_updated'].dt.date,format='%Y-%m-%d') + df_po['last_updated_time'] = df_po['last_updated'].dt.floor('s').dt.time df_po['agreed_delivery_date'] = pd.to_datetime(df_po['agreed_delivery_date'],format="%Y-%m-%d") df_po['agreed_payment_date'] = pd.to_datetime(df_po['agreed_payment_date'],format="%Y-%m-%d") df_po.drop(labels=['created_at','last_updated'],axis=1,inplace=True) @@ -48,10 +48,10 @@ def create_fact_purchase_orders(dict_of_df): def create_fact_payment(dict_of_df): df_payment = dict_of_df["payment"] df_payment.index.name = "payment_record_id" - df_payment["created_date"] = pd.to_datetime(df_payment["created_at"],format='%Y-%m-%d') - df_payment["created_time"] = pd.to_datetime(df_payment["created_at"],format='%H-%M-%S') - df_payment["last_updated_date"] = pd.to_datetime(df_payment["last_updated"],format='%Y-%m-%d') - df_payment["last_updated_time"] = pd.to_datetime(df_payment["last_updated"],format='%H-%M-%S') + df_payment["created_date"] = pd.to_datetime(df_payment["created_at"].dt.date,format='%Y-%m-%d') + df_payment["created_time"] = df_payment["created_at"].dt.floor('s').dt.time + df_payment["last_updated_date"] = pd.to_datetime(df_payment["last_updated"].dt.date,format='%Y-%m-%d') + df_payment["last_updated_time"] = df_payment["last_updated"].dt.floor('s').dt.time df_payment['payment_date'] = pd.to_datetime(df_payment['payment_date'],format="%Y-%m-%d") df_payment.drop(labels=['created_at','last_updated'],axis=1,inplace=True) df_payment.reset_index(inplace=True) diff --git a/tests/test_dataframes.py b/tests/test_dataframes.py index 8f32b1d..70aefe8 100644 --- a/tests/test_dataframes.py +++ b/tests/test_dataframes.py @@ -129,7 +129,8 @@ class TestCreateDimTransaction: class TestCreateFactPayment: def test_returns_correct_columns_payment(self): - dict_df = {'payment':pd.DataFrame(data=[[dt(2020,5,17,6,15,20),dt(2020,5,20,8,19,30),1,'SE18 9QO','2020-7-16']], + dict_df = {'payment':pd.DataFrame(data=[[dt.strptime('2022-11-03 14:20:49.962846','%Y-%m-%d %H:%M:%S.%f'), + dt.strptime('2022-12-14 16:20:49.962194','%Y-%m-%d %H:%M:%S.%f'),1,'SE18 9QO','2020-07-16']], columns=['created_at','last_updated','payment_id','some_other_id','payment_date'])} expected_cols = ['payment_record_id','created_date','created_time','last_updated_date', 'last_updated_time','payment_date','payment_id','some_other_id'] @@ -138,7 +139,11 @@ class TestCreateFactPayment: for col in list(result.columns): assert col in expected_cols for col in expected_cols: - if 'date' in col: + if '_date' in col: + print(col) assert result[col].dtype == 'datetime64[ns]' + if '_time' in col: + print(col) + assert result[col].dtype == 'O' #<< O for object
\ No newline at end of file |
