diff options
| -rw-r--r-- | src/dataframes.py | 85 | ||||
| -rw-r--r-- | tests/test_dataframes.py | 20 |
2 files changed, 33 insertions, 72 deletions
diff --git a/src/dataframes.py b/src/dataframes.py index 43facd6..ab32fff 100644 --- a/src/dataframes.py +++ b/src/dataframes.py @@ -20,9 +20,7 @@ import requests def create_fact_sales_order(dict_of_df): df_sales = dict_of_df["sales_order"] df_sales.index.name = "sales_record_id" - - -<< << << < HEAD + df_sales["created_date"] = df_sales["created_at"].astype( "datetime64[ns]").dt.date df_sales["created_time"] = ( @@ -33,17 +31,6 @@ def create_fact_sales_order(dict_of_df): ) df_sales["last_updated_time"] = ( df_sales["last_updated"].astype("datetime64[ns]").dt.floor("s").dt.time -== == == = - df_sales["created_date"]=pd.to_datetime( - df_sales["created_at"], format="%Y-%m-%d") - df_sales["created_time"]=pd.to_datetime( - df_sales["created_at"], format="%H-%M-%S") - df_sales["last_updated_date"]=pd.to_datetime( - df_sales["last_updated"], format="%Y-%m-%d" - ) - df_sales["last_updated_time"]=pd.to_datetime( - df_sales["last_updated"], format="%H-%M-%S" ->> >>>> > 5db3f61(style: format code with Autopep8, Black and Ruff Formatter) ) df_sales["agreed_delivery_date"]=pd.to_datetime( df_sales["agreed_delivery_date"], format="%Y-%m-%d" @@ -51,11 +38,28 @@ def create_fact_sales_order(dict_of_df): df_sales["agreed_payment_date"]=pd.to_datetime( df_sales["agreed_payment_date"], format="%Y-%m-%d" ) -<< << << < HEAD df_sales=df_sales.drop(labels=["created_at", "last_updated"], axis=1) -== == == = - df_sales.drop(labels=["created_at", "last_updated"], axis=1, inplace=True) ->> >>>> > 5db3f61(style: format code with Autopep8, Black and Ruff Formatter) + + df_sales.reset_index(inplace=True) + return df_sales + + df_sales["created_date"] = df_sales["created_at"].astype("datetime64[ns]").dt.date + df_sales["created_time"] = ( + df_sales["created_at"].astype("datetime64[ns]").dt.floor("s").dt.time + ) + df_sales["last_updated_date"] = ( + df_sales["last_updated"].astype("datetime64[ns]").dt.date + ) + df_sales["last_updated_time"] = ( + df_sales["last_updated"].astype("datetime64[ns]").dt.floor("s").dt.time + ) + df_sales["agreed_delivery_date"] = pd.to_datetime( + df_sales["agreed_delivery_date"], format="%Y-%m-%d" + ) + df_sales["agreed_payment_date"] = pd.to_datetime( + df_sales["agreed_payment_date"], format="%Y-%m-%d" + ) + df_sales = df_sales.drop(labels=["created_at", "last_updated"], axis=1) df_sales.reset_index(inplace=True) return df_sales @@ -66,7 +70,6 @@ def create_fact_sales_order(dict_of_df): def create_fact_purchase_orders(dict_of_df): df_po=dict_of_df["purchase_order"] df_po.index.name="purchase_record_id" -<< << << < HEAD df_po["created_date"]=df_po["created_at"].astype("datetime64[ns]").dt.date df_po["created_time"]=( df_po["created_at"].astype("datetime64[ns]").dt.floor("s").dt.time @@ -75,17 +78,7 @@ def create_fact_purchase_orders(dict_of_df): "datetime64[ns]").dt.date df_po["last_updated_time"]=( df_po["last_updated"].astype("datetime64[ns]").dt.floor("s").dt.time -== == == = - df_po["created_date"]=pd.to_datetime( - df_po["created_at"], format="%Y-%m-%d") - df_po["created_time"]=pd.to_datetime( - df_po["created_at"], format="%H-%M-%S") - df_po["last_updated_date"]=pd.to_datetime( - df_po["last_updated"], format="%Y-%m-%d" - ) - df_po["last_updated_time"]=pd.to_datetime( - df_po["last_updated"], format="%H-%M-%S" ->> >>>> > 5db3f61(style: format code with Autopep8, Black and Ruff Formatter) + ) df_po["agreed_delivery_date"]=pd.to_datetime( df_po["agreed_delivery_date"], format="%Y-%m-%d" @@ -93,11 +86,7 @@ def create_fact_purchase_orders(dict_of_df): df_po["agreed_payment_date"]=pd.to_datetime( df_po["agreed_payment_date"], format="%Y-%m-%d" ) -<< << << < HEAD df_po=df_po.drop(labels=["created_at", "last_updated"], axis=1) -== == == = - df_po.drop(labels=["created_at", "last_updated"], axis=1, inplace=True) ->> >>>> > 5db3f61(style: format code with Autopep8, Black and Ruff Formatter) df_po.reset_index(inplace=True) return df_po @@ -108,7 +97,6 @@ def create_fact_purchase_orders(dict_of_df): def create_fact_payment(dict_of_df): df_payment=dict_of_df["payment"] df_payment.index.name="payment_record_id" -<< << << < HEAD df_payment["created_date"]=( df_payment["created_at"].astype("datetime64[ns]").dt.date ) @@ -121,29 +109,12 @@ def create_fact_payment(dict_of_df): df_payment["last_updated_time"]=( df_payment["last_updated"].astype( "datetime64[ns]").dt.floor("s").dt.time -== == == = - df_payment["created_date"]=pd.to_datetime( - df_payment["created_at"], format="%Y-%m-%d" - ) - df_payment["created_time"]=pd.to_datetime( - df_payment["created_at"], format="%H-%M-%S" - ) - df_payment["last_updated_date"]=pd.to_datetime( - df_payment["last_updated"], format="%Y-%m-%d" - ) - df_payment["last_updated_time"]=pd.to_datetime( - df_payment["last_updated"], format="%H-%M-%S" ->> >>>> > 5db3f61(style: format code with Autopep8, Black and Ruff Formatter) ) df_payment["payment_date"]=pd.to_datetime( df_payment["payment_date"], format="%Y-%m-%d" ) -<< << << < HEAD df_payment=df_payment.drop(labels=["created_at", "last_updated"], axis=1) -== == == = - df_payment.drop( - labels=["created_at", "last_updated"], axis=1, inplace=True) ->> >>>> > 5db3f61(style: format code with Autopep8, Black and Ruff Formatter) + df_payment.reset_index(inplace=True) return df_payment @@ -196,14 +167,10 @@ def create_dim_date(dict_of_df): create_fact_purchase_orders(dict_of_df), create_fact_sales_order(dict_of_df), ] - list_of_date_columns=[] + list_of_date_columns = [] for df in fact_dfs: - date_col_names=[ -<< << << < HEAD + date_col_names = [ col_name for col_name in list(df.columns) if "_date" in col_name -== == == = - col_name for col_name in list(df.columns) if "date" in col_name ->> >>>> > 5db3f61(style: format code with Autopep8, Black and Ruff Formatter) ] for col in date_col_names: list_of_date_columns.append(df[col]) diff --git a/tests/test_dataframes.py b/tests/test_dataframes.py index 785a3fd..ff282eb 100644 --- a/tests/test_dataframes.py +++ b/tests/test_dataframes.py @@ -72,8 +72,10 @@ class TestCreateDimStaff: "email_address": ["Hello", "Bye"], "department_id": ["Hello", "Bye"], } + test_df = {"staff": pd.DataFrame( data=d), "department": pd.DataFrame(data=d2)} + result = create_dim_staff(test_df) expected_d = { "staff_id": ["Hello", "Bye"], @@ -92,6 +94,7 @@ class TestCreatePaymentType: def test_create_dim_payment_type_returns_correct_columns_and_values(self): d = {"payment_type_id": ["Hello", "Bye"], "payment_type_name": ["Hello", "Bye"]} + test_df = {"payment_type": pd.DataFrame(data=d)} result = create_dim_payment_type(test_df) expected_columns = ["payment_type_id", "payment_type_name"] @@ -190,6 +193,7 @@ class TestCreateDimDate: df_three = pd.DataFrame( data={"updated_date": dt(2022, 5, 17), "created_date": dt(2023, 5, 13)}, + index=[0], ) expected_df = pd.DataFrame( @@ -223,6 +227,7 @@ class TestCreateDimDate: expected_df, axis="columns").all(axis=None) + class TestCreateDimLocation: def test_returns_correct_columns_lo(self): dict_df = { @@ -230,6 +235,7 @@ class TestCreateDimLocation: data=[["some_time", "some_other_time", 1, "SE18 9QO"]], columns=["created_at", "last_updated", "address_id", "postal_code"], + ) } result = create_dim_location(dict_df) @@ -259,7 +265,6 @@ class TestCreateFactPayment: "payment": pd.DataFrame( data=[ [ - << << << < HEAD dt.strptime( "2022-11-03 14:20:49.962846", "%Y-%m-%d %H:%M:%S.%f" ), @@ -269,13 +274,6 @@ class TestCreateFactPayment: 1, "SE18 9QO", "2020-07-16", - == == === - dt(2020, 5, 17, 6, 15, 20), - dt(2020, 5, 20, 8, 19, 30), - 1, - "SE18 9QO", - "2020-7-16", - >>>>>> > 5db3f61(style: format code with Autopep8, Black and Ruff Formatter) ] ], columns=[ @@ -304,10 +302,6 @@ class TestCreateFactPayment: for col in expected_cols: -<< << << < HEAD + if "_date" or "_time" in col: assert result[col].dtype == "O" -== == == = -if "date" in col: - assert result[col].dtype == "datetime64[ns]" ->>>>>> > 5db3f61(style: format code with Autopep8, Black and Ruff Formatter) |
