diff options
| -rw-r--r-- | src/dataframes.py | 30 | ||||
| -rw-r--r-- | tests/test_fact_sales_order.py | 113 |
2 files changed, 82 insertions, 61 deletions
diff --git a/src/dataframes.py b/src/dataframes.py index 042c8aa..7d10aa7 100644 --- a/src/dataframes.py +++ b/src/dataframes.py @@ -81,28 +81,28 @@ def create_fact_payment(dict_of_df): ]] return fact_payment +#test passed def create_dim_transaction(dict_of_df): - df_transaction = dict_of_df["transaction"].drop(labels=['created_at', 'last_updated'], axis=1).set_index('transaction_id') - dim_transaction = df_transaction.loc[:, ["payment_type_id", "payment_type_name"]] - return dim_transaction + df_transaction = dict_of_df["transaction"].drop(labels=['created_at', 'last_updated'], axis=1) + return df_transaction -## dim_location from address --> drops 2 columns +#test passed def create_dim_location(dict_of_df): - df_loc = dict_of_df['address'].drop(labels=['created_at', 'last_updated'], axis=1).rename(columns={'address_id': 'location_id'}).set_index('location_id') + df_loc = dict_of_df['address'].drop(labels=['created_at', 'last_updated'], axis=1).rename(columns={'address_id': 'location_id'}) return df_loc -## dim_counterparty from address and counterparty + def create_dim_counterparty(dict_of_df): df_prefixed_address = dict_of_df['address'].add_prefix('counterparty_legal_', axis=1) df_cp = pd.merge(dict_of_df['counterparty'], df_prefixed_address, left_on="legal_address_id", - right_on="address_id", - how="outer").set_index('counterparty_id') + right_on="counterparty_legal_address_id", + how="outer") + df_cp.drop(columns=["legal_address_id","counterparty_legal_address_id"],inplace=True) return df_cp - -## dim_date from purchase_order +#test passed def create_dim_date(dict_of_df): fact_dfs = [create_fact_payment(dict_of_df), create_fact_purchase_orders(dict_of_df), create_fact_sales_order(dict_of_df)] date_col_names = [col_name for col_name in list(fact_dfs[0].columns) if 'date' in col_name] @@ -119,9 +119,10 @@ def create_dim_date(dict_of_df): df_date['day_of_week'] = df_date['date_id'].dt.dayofweek df_date['day_name'] = df_date['date_id'].dt.day_name() df_date['month_name'] = df_date['date_id'].dt.month_name() - df_date['quarter'] = df_date['date_id'].dt.quarter #By default, the DataFrame index is not included when uploading to RDS. We are not setting indexes to retain the column information - return + df_date['quarter'] = df_date['date_id'].dt.quarter + return df_date +#tests passed def scrape_currency_names(): response = requests.get('https://www.xe.com/currency/').content soup = BeautifulSoup(response,'html.parser') @@ -130,11 +131,12 @@ def scrape_currency_names(): df_cur = sr.str.split(pat=" - ",expand=True).rename({0:'currency_code',1:'currency_name'},axis=1) return df_cur +#tests passed def create_dim_currency(dict_of_df,names=scrape_currency_names()): df_cur = dict_of_df['currency'].drop(labels=['created_at', 'last_updated'], axis=1) - dim_cur = pd.merge(df_cur,names,left_on='currency_code',right_on='currency_code',how='inner').set_index('currency_id') - print(dim_cur) + dim_cur = pd.merge(df_cur,names,left_on='currency_code',right_on='currency_code',how='inner') return dim_cur + #tests passed def create_dim_payment_type(dict_of_df): df_payment_type = dict_of_df["payment_type"] diff --git a/tests/test_fact_sales_order.py b/tests/test_fact_sales_order.py index ca53faa..f0796eb 100644 --- a/tests/test_fact_sales_order.py +++ b/tests/test_fact_sales_order.py @@ -1,6 +1,7 @@ -from src.dataframes import create_dim_design, create_dim_staff, create_dim_payment_type, create_dim_counterparty, create_dim_currency +from src.dataframes import * import pandas as pd from unittest.mock import patch +from datetime import datetime as dt class TestCreateDimDesign: def test_dim_design_returns_dataframe(self): @@ -52,59 +53,77 @@ class TestCreatePaymentType: assert result.equals(expected_df) class TestCreateDimCounterparty: - def test_create_dim_counterparty_type_returns_correct_columns_and_values(self): - data_d = {"counterparty_id": ["Hello", "Bye"], + + def test_create_dim_counterparty_type_returns_correct_columns_and_object(self): + data_l = pd.DataFrame(data={"counterparty_id": ["Hello", "Bye"], "counterparty_legal_name": ["Hello", "Bye"], - "counterparty_legal_address_line_1": ["Hello", "Bye"], - } - data_a = {"address_id": - "address", - } - test_df = {"address": pd.DataFrame(data=data_a)} - test_df = {} + "commercial_contact": ["Hello", "Bye"], + "legal_address_id": ["bond street", "regent street"]}) + data_a = pd.DataFrame(data={"address_id":["bond street", "regent street"], + "postcode":[98365,93753]}) + test_df = {"address": data_a,"counterparty":data_l} result = create_dim_counterparty(test_df) - expected_columns = ["counterparty_id", - "counterparty_legal_name", - "counterparty_legal_address_line_1", - "counterparty_legal_address_line_2", - "counterparty_legal_district", - "counterparty_legal_city", - "counterparty_legal_postal_code", - "counterparty_legal_postal_code", - "counterparty_legal_phone_number"] - expected_d = {"counterparty_id": ["Hello", "Bye"], - "counterparty_legal_name": ["Hello", "Bye"], - "counterparty_legal_address_line_1": ["Hello", "Bye"], - "counterparty_legal_address_line_2": ["Hello", "Bye"], - "counterparty_legal_district": ["Hello", "Bye"], - "counterparty_legal_city": ["Hello", "Bye"], - "counterparty_legal_postal_code": ["Hello", "Bye"], - "counterparty_legal_postal_code": ["Hello", "Bye"], - "counterparty_legal_phone_number": ["Hello", "Bye"]} - expected_df = pd.DataFrame(data=expected_d) + expected_columns = ["counterparty_id", "counterparty_legal_name", + "commercial_contact", "counterparty_legal_postcode"] + print(data_l) + print(data_a) assert isinstance(result, pd.DataFrame) assert list(result.columns) == expected_columns - assert result.equals(expected_df) -# # figuring out how to mock currency scraper functiom -# class TestCreateDimCurrency: -# @patch("src.dataframes.scrape_currency_names") -# def test_dim_currency_returns_columns_and_values(self): -# d = {"currency_id": [1, 2, 3], "currency_code": ["USD", "EUR", "GBP"]} -# test_df = {"currency": pd.DataFrame(data=d)} -# result = create_dim_currency(test_df) -# expected_d = {"currency_id": [1, 2, 3], "currency_code": ["USD", "EUR", "GBP"], "currency_name": ["US Dollar", "Euro", "Pound"]} -# expected_df = pd.DataFrame(data=expected_d) -# expected_result = expected_df.copy() -# assert result.equals(expected_result) +class TestCreateDimCurrency: + + def test_dim_currency_returns_columns_and_values(self): + nones = [None,None,None] + d = {"currency_id": [1, 2, 3], "currency_code": ["USD", "EUR", "GBP"],"created_at":nones,"last_updated":nones} + test_df = {"currency": pd.DataFrame(data=d)} + scraper_output = pd.DataFrame({"currency_code":["RUS","USD","PHP","GBP","EUR"],"currency_name":["Rubble","US Dollar","Peso","Pound","Euro"]}) + result = create_dim_currency(test_df,names=scraper_output).sort_values(by="currency_code",axis=0) + expected_d = {"currency_id": [1, 2, 3], "currency_code": ["USD", "EUR", "GBP"], "currency_name": ["US Dollar", "Euro", "Pound"]} + expected_df = pd.DataFrame(data=expected_d).sort_values(by="currency_code",axis=0) + assert isinstance(result, pd.DataFrame) + assert result.equals(expected_df) -# def test_dim_currency_returns_dataframe(self): -# d = {"currency_id": [1, 2, 3], "currency_code": ["USD", "EUR", "GBP"]} -# test_df = {"currency": pd.DataFrame(data=d)} -# result = create_dim_currency(test_df) -# assert isinstance(result, pd.DataFrame) + def test_scrape_currency_names_returns_dataframe_with_correct_collumns(self): + result = scrape_currency_names() + assert isinstance(result,pd.DataFrame) + assert list(result.columns) == ['currency_code', 'currency_name'] + +class TestCreateDimDate: + + def test_returns_required_columns(self): + df_one = pd.DataFrame(data={'updated_date':dt(2020, 5, 17),'created_date':dt(2021, 5, 13),'not_dat':None},index=[0]) + df_two = pd.DataFrame(data={'updated_date':dt(2020, 5, 17),'created_date':dt(2021, 9, 13)},index=[0]) + df_three = pd.DataFrame(data={'updated_date':dt(2022, 5, 17),'created_date':dt(2023, 5, 13)},index=[0]) + expected_df = pd.DataFrame(data= + [[dt(2020,5,17),2020,5,17,6,'Sunday','May',2], + [dt(2021,5,13),2021,5,13,3,'Thursday','May',2], + [dt(2021,9,13),2021,9,13,0,'Monday','September',3], + [dt(2022,5,17),2022,5,17,1,'Tuesday','May',2], + [dt(2023,5,13),2023,5,13,5,'Saturday','May',2]], + columns=['date_id','year','month','day','day_of_week','day_name','month_name','quarter']) + with patch("src.dataframes.create_fact_payment") as mock_fp: + with patch("src.dataframes.create_fact_purchase_orders") as mock_fpo: + with patch("src.dataframes.create_fact_sales_order") as mock_fso: + mock_fp.return_value = df_one + mock_fpo.return_value = df_two + mock_fso.return_value = df_three + result = create_dim_date({'dum':0}) + result.reset_index(inplace=True,drop=True) + assert result.eq(expected_df, axis="columns").all(axis=None) - +class TestCreateDimLocation: + def test_returns_correct_columns_lo(self): + dict_df = {'address':pd.DataFrame(data=[['some_time','some_other_time',1,'SE18 9QO']], + columns=['created_at','last_updated','address_id','postal_code'])} + result = create_dim_location(dict_df) + assert list(result.columns) == ['location_id','postal_code'] + +class TestCreateDimTransaction: + def test_returns_correct_columns_tr(self): + dict_df = {'transaction':pd.DataFrame(data=[['some_time','some_other_time',1,'SE18 9QO']], + columns=['created_at','last_updated','transaction_id','some_other_id'])} + result = create_dim_transaction(dict_df) + assert list(result.columns) == ['transaction_id','some_other_id']
\ No newline at end of file |
