aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--src/dataframes.py24
-rw-r--r--tests/test_dataframes.py9
2 files changed, 19 insertions, 14 deletions
diff --git a/src/dataframes.py b/src/dataframes.py
index 41f39b8..1f445a4 100644
--- a/src/dataframes.py
+++ b/src/dataframes.py
@@ -20,10 +20,10 @@ import requests
def create_fact_sales_order(dict_of_df):
df_sales = dict_of_df["sales_order"]
df_sales.index.name = "sales_record_id"
- df_sales["created_date"] = pd.to_datetime(df_sales["created_at"],format='%Y-%m-%d')
- df_sales["created_time"] = pd.to_datetime(df_sales["created_at"],format='%H-%M-%S')
- df_sales["last_updated_date"] = pd.to_datetime(df_sales["last_updated"],format='%Y-%m-%d')
- df_sales["last_updated_time"] = pd.to_datetime(df_sales["last_updated"],format='%H-%M-%S')
+ df_sales["created_date"] = pd.to_datetime(df_sales["created_at"].dt.date,format='%Y-%m-%d')
+ df_sales["created_time"] = df_sales["created_at"].dt.floor('s').dt.time
+ df_sales["last_updated_date"] = pd.to_datetime(df_sales["last_updated"].dt.date,format='%Y-%m-%d')
+ df_sales["last_updated_time"] = df_sales["last_updated"].dt.floor('s').dt.time
df_sales['agreed_delivery_date'] = pd.to_datetime(df_sales['agreed_delivery_date'],format="%Y-%m-%d")
df_sales['agreed_payment_date'] = pd.to_datetime(df_sales['agreed_payment_date'],format="%Y-%m-%d")
df_sales.drop(labels=['created_at','last_updated'],axis=1,inplace=True)
@@ -34,10 +34,10 @@ def create_fact_sales_order(dict_of_df):
def create_fact_purchase_orders(dict_of_df):
df_po = dict_of_df['purchase_order']
df_po.index.name = 'purchase_record_id'
- df_po['created_date'] = pd.to_datetime(df_po['created_at'],format='%Y-%m-%d')
- df_po['created_time'] = pd.to_datetime(df_po['created_at'],format='%H-%M-%S')
- df_po['last_updated_date'] = pd.to_datetime(df_po['last_updated'],format='%Y-%m-%d')
- df_po['last_updated_time'] = pd.to_datetime(df_po['last_updated'],format='%H-%M-%S')
+ df_po['created_date'] = pd.to_datetime(df_po['created_at'].dt.date,format='%Y-%m-%d')
+ df_po['created_time'] = df_po['created_at'].dt.floor('s').dt.time
+ df_po['last_updated_date'] = pd.to_datetime(df_po['last_updated'].dt.date,format='%Y-%m-%d')
+ df_po['last_updated_time'] = df_po['last_updated'].dt.floor('s').dt.time
df_po['agreed_delivery_date'] = pd.to_datetime(df_po['agreed_delivery_date'],format="%Y-%m-%d")
df_po['agreed_payment_date'] = pd.to_datetime(df_po['agreed_payment_date'],format="%Y-%m-%d")
df_po.drop(labels=['created_at','last_updated'],axis=1,inplace=True)
@@ -48,10 +48,10 @@ def create_fact_purchase_orders(dict_of_df):
def create_fact_payment(dict_of_df):
df_payment = dict_of_df["payment"]
df_payment.index.name = "payment_record_id"
- df_payment["created_date"] = pd.to_datetime(df_payment["created_at"],format='%Y-%m-%d')
- df_payment["created_time"] = pd.to_datetime(df_payment["created_at"],format='%H-%M-%S')
- df_payment["last_updated_date"] = pd.to_datetime(df_payment["last_updated"],format='%Y-%m-%d')
- df_payment["last_updated_time"] = pd.to_datetime(df_payment["last_updated"],format='%H-%M-%S')
+ df_payment["created_date"] = pd.to_datetime(df_payment["created_at"].dt.date,format='%Y-%m-%d')
+ df_payment["created_time"] = df_payment["created_at"].dt.floor('s').dt.time
+ df_payment["last_updated_date"] = pd.to_datetime(df_payment["last_updated"].dt.date,format='%Y-%m-%d')
+ df_payment["last_updated_time"] = df_payment["last_updated"].dt.floor('s').dt.time
df_payment['payment_date'] = pd.to_datetime(df_payment['payment_date'],format="%Y-%m-%d")
df_payment.drop(labels=['created_at','last_updated'],axis=1,inplace=True)
df_payment.reset_index(inplace=True)
diff --git a/tests/test_dataframes.py b/tests/test_dataframes.py
index 8f32b1d..70aefe8 100644
--- a/tests/test_dataframes.py
+++ b/tests/test_dataframes.py
@@ -129,7 +129,8 @@ class TestCreateDimTransaction:
class TestCreateFactPayment:
def test_returns_correct_columns_payment(self):
- dict_df = {'payment':pd.DataFrame(data=[[dt(2020,5,17,6,15,20),dt(2020,5,20,8,19,30),1,'SE18 9QO','2020-7-16']],
+ dict_df = {'payment':pd.DataFrame(data=[[dt.strptime('2022-11-03 14:20:49.962846','%Y-%m-%d %H:%M:%S.%f'),
+ dt.strptime('2022-12-14 16:20:49.962194','%Y-%m-%d %H:%M:%S.%f'),1,'SE18 9QO','2020-07-16']],
columns=['created_at','last_updated','payment_id','some_other_id','payment_date'])}
expected_cols = ['payment_record_id','created_date','created_time','last_updated_date',
'last_updated_time','payment_date','payment_id','some_other_id']
@@ -138,7 +139,11 @@ class TestCreateFactPayment:
for col in list(result.columns):
assert col in expected_cols
for col in expected_cols:
- if 'date' in col:
+ if '_date' in col:
+ print(col)
assert result[col].dtype == 'datetime64[ns]'
+ if '_time' in col:
+ print(col)
+ assert result[col].dtype == 'O' #<< O for object
\ No newline at end of file
git.ajschof.me — hosted by ajschofield — powered by cgit