aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--src/dataframes.py111
-rw-r--r--tests/test_dataframes.py (renamed from tests/test_fact_sales_order.py)41
2 files changed, 95 insertions, 57 deletions
diff --git a/src/dataframes.py b/src/dataframes.py
index ab53063..e60123a 100644
--- a/src/dataframes.py
+++ b/src/dataframes.py
@@ -16,85 +16,78 @@ import requests
# dim_counterparty
+# no test, same as fact_payment
def create_fact_sales_order(dict_of_df):
df_sales = dict_of_df["sales_order"]
df_sales.index.name = "sales_record_id"
- df_sales["created_date"] = pd.to_datetime(df_sales["created_at"]).dt.date
- df_sales["created_time"] = pd.to_datetime(df_sales["created_at"]).dt.time
- df_sales["last_updated_date"] = pd.to_datetime(df_sales["last_updated"]).dt.date
- df_sales["last_updated_time"] = pd.to_datetime(df_sales["last_updated"]).dt.time
- fact_sales_order = df_sales.loc[
- :,
- [
- "sales_record_id",
- "sales_order_id",
- "created_date",
- "created_time",
- "last_updated_date",
- "last_updated_time",
- "sales_staff_id",
- "counterparty_id",
- "units_sold",
- "unit_price",
- "currency_id",
- "design_id",
- "agreed_payment_date",
- "agreed_delivery_date",
- "agreed_delivery_location_id",
- ],
- ]
- return fact_sales_order
+ df_sales["created_date"] = pd.to_datetime(df_sales["created_at"], format="%Y-%m-%d")
+ df_sales["created_time"] = pd.to_datetime(df_sales["created_at"], format="%H-%M-%S")
+ df_sales["last_updated_date"] = pd.to_datetime(
+ df_sales["last_updated"], format="%Y-%m-%d"
+ )
+ df_sales["last_updated_time"] = pd.to_datetime(
+ df_sales["last_updated"], format="%H-%M-%S"
+ )
+ df_sales["agreed_delivery_date"] = pd.to_datetime(
+ df_sales["agreed_delivery_date"], format="%Y-%m-%d"
+ )
+ df_sales["agreed_payment_date"] = pd.to_datetime(
+ df_sales["agreed_payment_date"], format="%Y-%m-%d"
+ )
+ df_sales.drop(labels=["created_at", "last_updated"], axis=1, inplace=True)
+ df_sales.reset_index(inplace=True)
+ return df_sales
-# fact_purchase_order from purchase_order
+# no test, same as fact_payment
def create_fact_purchase_orders(dict_of_df):
df_po = dict_of_df["purchase_order"]
df_po.index.name = "purchase_record_id"
- df_po["created_date"] = df_po["created_at"].date()
- df_po["created_time"] = df_po["created_at"].dt.time
- df_po["last_updated_date"] = df_po["last_updated_at"].date()
- df_po["last_updated_time"] = df_po["last_updated_at"].dt.time
+ df_po["created_date"] = pd.to_datetime(df_po["created_at"], format="%Y-%m-%d")
+ df_po["created_time"] = pd.to_datetime(df_po["created_at"], format="%H-%M-%S")
+ df_po["last_updated_date"] = pd.to_datetime(
+ df_po["last_updated"], format="%Y-%m-%d"
+ )
+ df_po["last_updated_time"] = pd.to_datetime(
+ df_po["last_updated"], format="%H-%M-%S"
+ )
df_po["agreed_delivery_date"] = pd.to_datetime(
df_po["agreed_delivery_date"], format="%Y-%m-%d"
)
df_po["agreed_payment_date"] = pd.to_datetime(
df_po["agreed_payment_date"], format="%Y-%m-%d"
)
- df_po.drop(labels=["created_at", "last_updated_at"], axis=1, inplace=True)
+ df_po.drop(labels=["created_at", "last_updated"], axis=1, inplace=True)
+ df_po.reset_index(inplace=True)
return df_po
+# test passed
+
+
def create_fact_payment(dict_of_df):
df_payment = dict_of_df["payment"]
df_payment.index.name = "payment_record_id"
- df_payment["created_date"] = df_payment["created_at"].date()
- df_payment["created_time"] = df_payment["created_at"].time
- df_payment["last_updated_date"] = df_payment["last_updated"].date()
- df_payment["last_updated_time"] = df_payment["last_updated"].time
+ df_payment["created_date"] = pd.to_datetime(
+ df_payment["created_at"], format="%Y-%m-%d"
+ )
+ df_payment["created_time"] = pd.to_datetime(
+ df_payment["created_at"], format="%H-%M-%S"
+ )
+ df_payment["last_updated_date"] = pd.to_datetime(
+ df_payment["last_updated"], format="%Y-%m-%d"
+ )
+ df_payment["last_updated_time"] = pd.to_datetime(
+ df_payment["last_updated"], format="%H-%M-%S"
+ )
df_payment["payment_date"] = pd.to_datetime(
df_payment["payment_date"], format="%Y-%m-%d"
)
- fact_payment = df_payment.loc[
- :,
- [
- "payment_record_id",
- "payment_id",
- "created_date",
- "created_time",
- "last_updated_date",
- "last_updated_time",
- "transaction_id",
- "counterparty_id",
- "payment_amount",
- "currency_id",
- "payment_type_id",
- "paid",
- "payment_date",
- ],
- ]
- return fact_payment
+ df_payment.drop(labels=["created_at", "last_updated"], axis=1, inplace=True)
+ df_payment.reset_index(inplace=True)
+ return df_payment
# test passed
@@ -108,6 +101,8 @@ def create_dim_transaction(dict_of_df):
# test passed
+
+
def create_dim_location(dict_of_df):
df_loc = (
dict_of_df["address"]
@@ -143,11 +138,11 @@ def create_dim_date(dict_of_df):
create_fact_purchase_orders(dict_of_df),
create_fact_sales_order(dict_of_df),
]
- date_col_names = [
- col_name for col_name in list(fact_dfs[0].columns) if "date" in col_name
- ]
list_of_date_columns = []
for df in fact_dfs:
+ date_col_names = [
+ col_name for col_name in list(df.columns) if "date" in col_name
+ ]
for col in date_col_names:
list_of_date_columns.append(df[col])
sr_date = pd.array(pd.concat(list_of_date_columns), dtype="datetime64[ns]")
@@ -164,6 +159,8 @@ def create_dim_date(dict_of_df):
# tests passed
+
+
def scrape_currency_names():
response = requests.get("https://www.xe.com/currency/").content
soup = BeautifulSoup(response, "html.parser")
diff --git a/tests/test_fact_sales_order.py b/tests/test_dataframes.py
index a245379..584ab27 100644
--- a/tests/test_fact_sales_order.py
+++ b/tests/test_dataframes.py
@@ -244,3 +244,44 @@ class TestCreateDimTransaction:
}
result = create_dim_transaction(dict_df)
assert list(result.columns) == ["transaction_id", "some_other_id"]
+
+
+class TestCreateFactPayment:
+ def test_returns_correct_columns_payment(self):
+ dict_df = {
+ "payment": pd.DataFrame(
+ data=[
+ [
+ dt(2020, 5, 17, 6, 15, 20),
+ dt(2020, 5, 20, 8, 19, 30),
+ 1,
+ "SE18 9QO",
+ "2020-7-16",
+ ]
+ ],
+ columns=[
+ "created_at",
+ "last_updated",
+ "payment_id",
+ "some_other_id",
+ "payment_date",
+ ],
+ )
+ }
+ expected_cols = [
+ "payment_record_id",
+ "created_date",
+ "created_time",
+ "last_updated_date",
+ "last_updated_time",
+ "payment_date",
+ "payment_id",
+ "some_other_id",
+ ]
+ result = create_fact_payment(dict_df)
+ assert isinstance(result, pd.DataFrame)
+ for col in list(result.columns):
+ assert col in expected_cols
+ for col in expected_cols:
+ if "date" in col:
+ assert result[col].dtype == "datetime64[ns]"
git.ajschof.me — hosted by ajschofield — powered by cgit