aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--src/dataframes.py85
-rw-r--r--tests/test_dataframes.py20
2 files changed, 33 insertions, 72 deletions
diff --git a/src/dataframes.py b/src/dataframes.py
index 43facd6..ab32fff 100644
--- a/src/dataframes.py
+++ b/src/dataframes.py
@@ -20,9 +20,7 @@ import requests
def create_fact_sales_order(dict_of_df):
df_sales = dict_of_df["sales_order"]
df_sales.index.name = "sales_record_id"
-
-
-<< << << < HEAD
+
df_sales["created_date"] = df_sales["created_at"].astype(
"datetime64[ns]").dt.date
df_sales["created_time"] = (
@@ -33,17 +31,6 @@ def create_fact_sales_order(dict_of_df):
)
df_sales["last_updated_time"] = (
df_sales["last_updated"].astype("datetime64[ns]").dt.floor("s").dt.time
-== == == =
- df_sales["created_date"]=pd.to_datetime(
- df_sales["created_at"], format="%Y-%m-%d")
- df_sales["created_time"]=pd.to_datetime(
- df_sales["created_at"], format="%H-%M-%S")
- df_sales["last_updated_date"]=pd.to_datetime(
- df_sales["last_updated"], format="%Y-%m-%d"
- )
- df_sales["last_updated_time"]=pd.to_datetime(
- df_sales["last_updated"], format="%H-%M-%S"
->> >>>> > 5db3f61(style: format code with Autopep8, Black and Ruff Formatter)
)
df_sales["agreed_delivery_date"]=pd.to_datetime(
df_sales["agreed_delivery_date"], format="%Y-%m-%d"
@@ -51,11 +38,28 @@ def create_fact_sales_order(dict_of_df):
df_sales["agreed_payment_date"]=pd.to_datetime(
df_sales["agreed_payment_date"], format="%Y-%m-%d"
)
-<< << << < HEAD
df_sales=df_sales.drop(labels=["created_at", "last_updated"], axis=1)
-== == == =
- df_sales.drop(labels=["created_at", "last_updated"], axis=1, inplace=True)
->> >>>> > 5db3f61(style: format code with Autopep8, Black and Ruff Formatter)
+
+ df_sales.reset_index(inplace=True)
+ return df_sales
+
+ df_sales["created_date"] = df_sales["created_at"].astype("datetime64[ns]").dt.date
+ df_sales["created_time"] = (
+ df_sales["created_at"].astype("datetime64[ns]").dt.floor("s").dt.time
+ )
+ df_sales["last_updated_date"] = (
+ df_sales["last_updated"].astype("datetime64[ns]").dt.date
+ )
+ df_sales["last_updated_time"] = (
+ df_sales["last_updated"].astype("datetime64[ns]").dt.floor("s").dt.time
+ )
+ df_sales["agreed_delivery_date"] = pd.to_datetime(
+ df_sales["agreed_delivery_date"], format="%Y-%m-%d"
+ )
+ df_sales["agreed_payment_date"] = pd.to_datetime(
+ df_sales["agreed_payment_date"], format="%Y-%m-%d"
+ )
+ df_sales = df_sales.drop(labels=["created_at", "last_updated"], axis=1)
df_sales.reset_index(inplace=True)
return df_sales
@@ -66,7 +70,6 @@ def create_fact_sales_order(dict_of_df):
def create_fact_purchase_orders(dict_of_df):
df_po=dict_of_df["purchase_order"]
df_po.index.name="purchase_record_id"
-<< << << < HEAD
df_po["created_date"]=df_po["created_at"].astype("datetime64[ns]").dt.date
df_po["created_time"]=(
df_po["created_at"].astype("datetime64[ns]").dt.floor("s").dt.time
@@ -75,17 +78,7 @@ def create_fact_purchase_orders(dict_of_df):
"datetime64[ns]").dt.date
df_po["last_updated_time"]=(
df_po["last_updated"].astype("datetime64[ns]").dt.floor("s").dt.time
-== == == =
- df_po["created_date"]=pd.to_datetime(
- df_po["created_at"], format="%Y-%m-%d")
- df_po["created_time"]=pd.to_datetime(
- df_po["created_at"], format="%H-%M-%S")
- df_po["last_updated_date"]=pd.to_datetime(
- df_po["last_updated"], format="%Y-%m-%d"
- )
- df_po["last_updated_time"]=pd.to_datetime(
- df_po["last_updated"], format="%H-%M-%S"
->> >>>> > 5db3f61(style: format code with Autopep8, Black and Ruff Formatter)
+
)
df_po["agreed_delivery_date"]=pd.to_datetime(
df_po["agreed_delivery_date"], format="%Y-%m-%d"
@@ -93,11 +86,7 @@ def create_fact_purchase_orders(dict_of_df):
df_po["agreed_payment_date"]=pd.to_datetime(
df_po["agreed_payment_date"], format="%Y-%m-%d"
)
-<< << << < HEAD
df_po=df_po.drop(labels=["created_at", "last_updated"], axis=1)
-== == == =
- df_po.drop(labels=["created_at", "last_updated"], axis=1, inplace=True)
->> >>>> > 5db3f61(style: format code with Autopep8, Black and Ruff Formatter)
df_po.reset_index(inplace=True)
return df_po
@@ -108,7 +97,6 @@ def create_fact_purchase_orders(dict_of_df):
def create_fact_payment(dict_of_df):
df_payment=dict_of_df["payment"]
df_payment.index.name="payment_record_id"
-<< << << < HEAD
df_payment["created_date"]=(
df_payment["created_at"].astype("datetime64[ns]").dt.date
)
@@ -121,29 +109,12 @@ def create_fact_payment(dict_of_df):
df_payment["last_updated_time"]=(
df_payment["last_updated"].astype(
"datetime64[ns]").dt.floor("s").dt.time
-== == == =
- df_payment["created_date"]=pd.to_datetime(
- df_payment["created_at"], format="%Y-%m-%d"
- )
- df_payment["created_time"]=pd.to_datetime(
- df_payment["created_at"], format="%H-%M-%S"
- )
- df_payment["last_updated_date"]=pd.to_datetime(
- df_payment["last_updated"], format="%Y-%m-%d"
- )
- df_payment["last_updated_time"]=pd.to_datetime(
- df_payment["last_updated"], format="%H-%M-%S"
->> >>>> > 5db3f61(style: format code with Autopep8, Black and Ruff Formatter)
)
df_payment["payment_date"]=pd.to_datetime(
df_payment["payment_date"], format="%Y-%m-%d"
)
-<< << << < HEAD
df_payment=df_payment.drop(labels=["created_at", "last_updated"], axis=1)
-== == == =
- df_payment.drop(
- labels=["created_at", "last_updated"], axis=1, inplace=True)
->> >>>> > 5db3f61(style: format code with Autopep8, Black and Ruff Formatter)
+
df_payment.reset_index(inplace=True)
return df_payment
@@ -196,14 +167,10 @@ def create_dim_date(dict_of_df):
create_fact_purchase_orders(dict_of_df),
create_fact_sales_order(dict_of_df),
]
- list_of_date_columns=[]
+ list_of_date_columns = []
for df in fact_dfs:
- date_col_names=[
-<< << << < HEAD
+ date_col_names = [
col_name for col_name in list(df.columns) if "_date" in col_name
-== == == =
- col_name for col_name in list(df.columns) if "date" in col_name
->> >>>> > 5db3f61(style: format code with Autopep8, Black and Ruff Formatter)
]
for col in date_col_names:
list_of_date_columns.append(df[col])
diff --git a/tests/test_dataframes.py b/tests/test_dataframes.py
index 785a3fd..ff282eb 100644
--- a/tests/test_dataframes.py
+++ b/tests/test_dataframes.py
@@ -72,8 +72,10 @@ class TestCreateDimStaff:
"email_address": ["Hello", "Bye"],
"department_id": ["Hello", "Bye"],
}
+
test_df = {"staff": pd.DataFrame(
data=d), "department": pd.DataFrame(data=d2)}
+
result = create_dim_staff(test_df)
expected_d = {
"staff_id": ["Hello", "Bye"],
@@ -92,6 +94,7 @@ class TestCreatePaymentType:
def test_create_dim_payment_type_returns_correct_columns_and_values(self):
d = {"payment_type_id": ["Hello", "Bye"],
"payment_type_name": ["Hello", "Bye"]}
+
test_df = {"payment_type": pd.DataFrame(data=d)}
result = create_dim_payment_type(test_df)
expected_columns = ["payment_type_id", "payment_type_name"]
@@ -190,6 +193,7 @@ class TestCreateDimDate:
df_three = pd.DataFrame(
data={"updated_date": dt(2022, 5, 17),
"created_date": dt(2023, 5, 13)},
+
index=[0],
)
expected_df = pd.DataFrame(
@@ -223,6 +227,7 @@ class TestCreateDimDate:
expected_df, axis="columns").all(axis=None)
+
class TestCreateDimLocation:
def test_returns_correct_columns_lo(self):
dict_df = {
@@ -230,6 +235,7 @@ class TestCreateDimLocation:
data=[["some_time", "some_other_time", 1, "SE18 9QO"]],
columns=["created_at", "last_updated",
"address_id", "postal_code"],
+
)
}
result = create_dim_location(dict_df)
@@ -259,7 +265,6 @@ class TestCreateFactPayment:
"payment": pd.DataFrame(
data=[
[
- << << << < HEAD
dt.strptime(
"2022-11-03 14:20:49.962846", "%Y-%m-%d %H:%M:%S.%f"
),
@@ -269,13 +274,6 @@ class TestCreateFactPayment:
1,
"SE18 9QO",
"2020-07-16",
- == == ===
- dt(2020, 5, 17, 6, 15, 20),
- dt(2020, 5, 20, 8, 19, 30),
- 1,
- "SE18 9QO",
- "2020-7-16",
- >>>>>> > 5db3f61(style: format code with Autopep8, Black and Ruff Formatter)
]
],
columns=[
@@ -304,10 +302,6 @@ class TestCreateFactPayment:
for col in expected_cols:
-<< << << < HEAD
+
if "_date" or "_time" in col:
assert result[col].dtype == "O"
-== == == =
-if "date" in col:
- assert result[col].dtype == "datetime64[ns]"
->>>>>> > 5db3f61(style: format code with Autopep8, Black and Ruff Formatter)
git.ajschof.me — hosted by ajschofield — powered by cgit