aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAng Bel <anzelikabelotelova@Anzelikas-MacBook-Air.local>2024-08-23 17:09:27 +0100
committerAng Bel <anzelikabelotelova@Anzelikas-MacBook-Air.local>2024-08-23 17:09:27 +0100
commit821e241c925e682845e02e9609ba3a2c758966d8 (patch)
tree30f811c3d7dd240029cf3da926823e67b529af92
parenteeaaeb471f3410e5c655836253484a41e54ef71b (diff)
downloadde-project-bentley-821e241c925e682845e02e9609ba3a2c758966d8.tar.gz
de-project-bentley-821e241c925e682845e02e9609ba3a2c758966d8.zip
tests: additional tests written (pass) for dim tables transformation. Fact transformation functions not yet tested
-rw-r--r--src/dataframes.py30
-rw-r--r--tests/test_fact_sales_order.py113
2 files changed, 82 insertions, 61 deletions
diff --git a/src/dataframes.py b/src/dataframes.py
index 042c8aa..7d10aa7 100644
--- a/src/dataframes.py
+++ b/src/dataframes.py
@@ -81,28 +81,28 @@ def create_fact_payment(dict_of_df):
]]
return fact_payment
+#test passed
def create_dim_transaction(dict_of_df):
- df_transaction = dict_of_df["transaction"].drop(labels=['created_at', 'last_updated'], axis=1).set_index('transaction_id')
- dim_transaction = df_transaction.loc[:, ["payment_type_id", "payment_type_name"]]
- return dim_transaction
+ df_transaction = dict_of_df["transaction"].drop(labels=['created_at', 'last_updated'], axis=1)
+ return df_transaction
-## dim_location from address --> drops 2 columns
+#test passed
def create_dim_location(dict_of_df):
- df_loc = dict_of_df['address'].drop(labels=['created_at', 'last_updated'], axis=1).rename(columns={'address_id': 'location_id'}).set_index('location_id')
+ df_loc = dict_of_df['address'].drop(labels=['created_at', 'last_updated'], axis=1).rename(columns={'address_id': 'location_id'})
return df_loc
-## dim_counterparty from address and counterparty
+
def create_dim_counterparty(dict_of_df):
df_prefixed_address = dict_of_df['address'].add_prefix('counterparty_legal_', axis=1)
df_cp = pd.merge(dict_of_df['counterparty'],
df_prefixed_address,
left_on="legal_address_id",
- right_on="address_id",
- how="outer").set_index('counterparty_id')
+ right_on="counterparty_legal_address_id",
+ how="outer")
+ df_cp.drop(columns=["legal_address_id","counterparty_legal_address_id"],inplace=True)
return df_cp
-
-## dim_date from purchase_order
+#test passed
def create_dim_date(dict_of_df):
fact_dfs = [create_fact_payment(dict_of_df), create_fact_purchase_orders(dict_of_df), create_fact_sales_order(dict_of_df)]
date_col_names = [col_name for col_name in list(fact_dfs[0].columns) if 'date' in col_name]
@@ -119,9 +119,10 @@ def create_dim_date(dict_of_df):
df_date['day_of_week'] = df_date['date_id'].dt.dayofweek
df_date['day_name'] = df_date['date_id'].dt.day_name()
df_date['month_name'] = df_date['date_id'].dt.month_name()
- df_date['quarter'] = df_date['date_id'].dt.quarter #By default, the DataFrame index is not included when uploading to RDS. We are not setting indexes to retain the column information
- return
+ df_date['quarter'] = df_date['date_id'].dt.quarter
+ return df_date
+#tests passed
def scrape_currency_names():
response = requests.get('https://www.xe.com/currency/').content
soup = BeautifulSoup(response,'html.parser')
@@ -130,11 +131,12 @@ def scrape_currency_names():
df_cur = sr.str.split(pat=" - ",expand=True).rename({0:'currency_code',1:'currency_name'},axis=1)
return df_cur
+#tests passed
def create_dim_currency(dict_of_df,names=scrape_currency_names()):
df_cur = dict_of_df['currency'].drop(labels=['created_at', 'last_updated'], axis=1)
- dim_cur = pd.merge(df_cur,names,left_on='currency_code',right_on='currency_code',how='inner').set_index('currency_id')
- print(dim_cur)
+ dim_cur = pd.merge(df_cur,names,left_on='currency_code',right_on='currency_code',how='inner')
return dim_cur
+
#tests passed
def create_dim_payment_type(dict_of_df):
df_payment_type = dict_of_df["payment_type"]
diff --git a/tests/test_fact_sales_order.py b/tests/test_fact_sales_order.py
index ca53faa..f0796eb 100644
--- a/tests/test_fact_sales_order.py
+++ b/tests/test_fact_sales_order.py
@@ -1,6 +1,7 @@
-from src.dataframes import create_dim_design, create_dim_staff, create_dim_payment_type, create_dim_counterparty, create_dim_currency
+from src.dataframes import *
import pandas as pd
from unittest.mock import patch
+from datetime import datetime as dt
class TestCreateDimDesign:
def test_dim_design_returns_dataframe(self):
@@ -52,59 +53,77 @@ class TestCreatePaymentType:
assert result.equals(expected_df)
class TestCreateDimCounterparty:
- def test_create_dim_counterparty_type_returns_correct_columns_and_values(self):
- data_d = {"counterparty_id": ["Hello", "Bye"],
+
+ def test_create_dim_counterparty_type_returns_correct_columns_and_object(self):
+ data_l = pd.DataFrame(data={"counterparty_id": ["Hello", "Bye"],
"counterparty_legal_name": ["Hello", "Bye"],
- "counterparty_legal_address_line_1": ["Hello", "Bye"],
- }
- data_a = {"address_id":
- "address",
- }
- test_df = {"address": pd.DataFrame(data=data_a)}
- test_df = {}
+ "commercial_contact": ["Hello", "Bye"],
+ "legal_address_id": ["bond street", "regent street"]})
+ data_a = pd.DataFrame(data={"address_id":["bond street", "regent street"],
+ "postcode":[98365,93753]})
+ test_df = {"address": data_a,"counterparty":data_l}
result = create_dim_counterparty(test_df)
- expected_columns = ["counterparty_id",
- "counterparty_legal_name",
- "counterparty_legal_address_line_1",
- "counterparty_legal_address_line_2",
- "counterparty_legal_district",
- "counterparty_legal_city",
- "counterparty_legal_postal_code",
- "counterparty_legal_postal_code",
- "counterparty_legal_phone_number"]
- expected_d = {"counterparty_id": ["Hello", "Bye"],
- "counterparty_legal_name": ["Hello", "Bye"],
- "counterparty_legal_address_line_1": ["Hello", "Bye"],
- "counterparty_legal_address_line_2": ["Hello", "Bye"],
- "counterparty_legal_district": ["Hello", "Bye"],
- "counterparty_legal_city": ["Hello", "Bye"],
- "counterparty_legal_postal_code": ["Hello", "Bye"],
- "counterparty_legal_postal_code": ["Hello", "Bye"],
- "counterparty_legal_phone_number": ["Hello", "Bye"]}
- expected_df = pd.DataFrame(data=expected_d)
+ expected_columns = ["counterparty_id", "counterparty_legal_name",
+ "commercial_contact", "counterparty_legal_postcode"]
+ print(data_l)
+ print(data_a)
assert isinstance(result, pd.DataFrame)
assert list(result.columns) == expected_columns
- assert result.equals(expected_df)
-# # figuring out how to mock currency scraper functiom
-# class TestCreateDimCurrency:
-# @patch("src.dataframes.scrape_currency_names")
-# def test_dim_currency_returns_columns_and_values(self):
-# d = {"currency_id": [1, 2, 3], "currency_code": ["USD", "EUR", "GBP"]}
-# test_df = {"currency": pd.DataFrame(data=d)}
-# result = create_dim_currency(test_df)
-# expected_d = {"currency_id": [1, 2, 3], "currency_code": ["USD", "EUR", "GBP"], "currency_name": ["US Dollar", "Euro", "Pound"]}
-# expected_df = pd.DataFrame(data=expected_d)
-# expected_result = expected_df.copy()
-# assert result.equals(expected_result)
+class TestCreateDimCurrency:
+
+ def test_dim_currency_returns_columns_and_values(self):
+ nones = [None,None,None]
+ d = {"currency_id": [1, 2, 3], "currency_code": ["USD", "EUR", "GBP"],"created_at":nones,"last_updated":nones}
+ test_df = {"currency": pd.DataFrame(data=d)}
+ scraper_output = pd.DataFrame({"currency_code":["RUS","USD","PHP","GBP","EUR"],"currency_name":["Rubble","US Dollar","Peso","Pound","Euro"]})
+ result = create_dim_currency(test_df,names=scraper_output).sort_values(by="currency_code",axis=0)
+ expected_d = {"currency_id": [1, 2, 3], "currency_code": ["USD", "EUR", "GBP"], "currency_name": ["US Dollar", "Euro", "Pound"]}
+ expected_df = pd.DataFrame(data=expected_d).sort_values(by="currency_code",axis=0)
+ assert isinstance(result, pd.DataFrame)
+ assert result.equals(expected_df)
-# def test_dim_currency_returns_dataframe(self):
-# d = {"currency_id": [1, 2, 3], "currency_code": ["USD", "EUR", "GBP"]}
-# test_df = {"currency": pd.DataFrame(data=d)}
-# result = create_dim_currency(test_df)
-# assert isinstance(result, pd.DataFrame)
+ def test_scrape_currency_names_returns_dataframe_with_correct_collumns(self):
+ result = scrape_currency_names()
+ assert isinstance(result,pd.DataFrame)
+ assert list(result.columns) == ['currency_code', 'currency_name']
+
+class TestCreateDimDate:
+
+ def test_returns_required_columns(self):
+ df_one = pd.DataFrame(data={'updated_date':dt(2020, 5, 17),'created_date':dt(2021, 5, 13),'not_dat':None},index=[0])
+ df_two = pd.DataFrame(data={'updated_date':dt(2020, 5, 17),'created_date':dt(2021, 9, 13)},index=[0])
+ df_three = pd.DataFrame(data={'updated_date':dt(2022, 5, 17),'created_date':dt(2023, 5, 13)},index=[0])
+ expected_df = pd.DataFrame(data=
+ [[dt(2020,5,17),2020,5,17,6,'Sunday','May',2],
+ [dt(2021,5,13),2021,5,13,3,'Thursday','May',2],
+ [dt(2021,9,13),2021,9,13,0,'Monday','September',3],
+ [dt(2022,5,17),2022,5,17,1,'Tuesday','May',2],
+ [dt(2023,5,13),2023,5,13,5,'Saturday','May',2]],
+ columns=['date_id','year','month','day','day_of_week','day_name','month_name','quarter'])
+ with patch("src.dataframes.create_fact_payment") as mock_fp:
+ with patch("src.dataframes.create_fact_purchase_orders") as mock_fpo:
+ with patch("src.dataframes.create_fact_sales_order") as mock_fso:
+ mock_fp.return_value = df_one
+ mock_fpo.return_value = df_two
+ mock_fso.return_value = df_three
+ result = create_dim_date({'dum':0})
+ result.reset_index(inplace=True,drop=True)
+ assert result.eq(expected_df, axis="columns").all(axis=None)
-
+class TestCreateDimLocation:
+ def test_returns_correct_columns_lo(self):
+ dict_df = {'address':pd.DataFrame(data=[['some_time','some_other_time',1,'SE18 9QO']],
+ columns=['created_at','last_updated','address_id','postal_code'])}
+ result = create_dim_location(dict_df)
+ assert list(result.columns) == ['location_id','postal_code']
+
+class TestCreateDimTransaction:
+ def test_returns_correct_columns_tr(self):
+ dict_df = {'transaction':pd.DataFrame(data=[['some_time','some_other_time',1,'SE18 9QO']],
+ columns=['created_at','last_updated','transaction_id','some_other_id'])}
+ result = create_dim_transaction(dict_df)
+ assert list(result.columns) == ['transaction_id','some_other_id']
\ No newline at end of file
git.ajschof.me — hosted by ajschofield — powered by cgit