diff options
| -rw-r--r-- | src/fact-purchase-table.py | 34 | ||||
| -rw-r--r-- | src/fact-sales-order.py | 29 | ||||
| -rw-r--r-- | src/transform_lambda.py | 4 |
3 files changed, 58 insertions, 9 deletions
diff --git a/src/fact-purchase-table.py b/src/fact-purchase-table.py new file mode 100644 index 0000000..53c0148 --- /dev/null +++ b/src/fact-purchase-table.py @@ -0,0 +1,34 @@ +from src.transform_lambda import read_from_s3_subfolder_to_df, tables +from src.extract_lambda import extract_bucket +import json +import boto3 +import re +import pandas as pd + + +dict_of_df = read_from_s3_subfolder_to_df(tables, extract_bucket(), client=boto3.client("s3")) + + +# iterates through each dataframe in the list of dataframes and assigns them to a variable +df_staff = dict_of_df['staff'] ##no change +df_currency = dict_of_df['currency'] ##scraping API +df_counterparty = dict_of_df['counterparty'] +df_address = dict_of_df['address'] +df_department = dict_of_df['department'] +df_purchase_order = dict_of_df['purchase_order'] + +## dim_staff table is the same across the schemas (no change) + +## dim_counterparty table + +## dim_location df_currency --> drops 2 columns +dim_location = df_address.drop(labels=['created_at', 'last_updated'], axis=1).rename(columns={'address_id': 'location_id'}) + +## dim_counterparty +df_prefixed_address = df_address.add_prefix('counterparty_legal_', axis=1) +pd.merge(df_counterparty, + df_prefixed_address, + left_on="legal_address_id", + right_on="address_id", + how="outer") + diff --git a/src/fact-sales-order.py b/src/fact-sales-order.py index ef18f02..870f660 100644 --- a/src/fact-sales-order.py +++ b/src/fact-sales-order.py @@ -1,7 +1,8 @@ import pandas as pd from src.transform_lambda import get_dataframes -dict_of_df = get_dataframes() # {"design": "design dataframe", "address": "address dataframe", ....} +# {"design": "design dataframe", "address": "address dataframe", ....} +dict_of_df = get_dataframes() # iterates through each dataframe in the list of dataframes and assigns them to a variable @@ -33,16 +34,30 @@ dim_currency["currency_name"] = dim_currency["currency_code"].map(mappings) # creates the dim_location dataframe -# need to change address id to location id +# need to change address id to location id "dim_location dataframe: (location_id, address_line_1, address_line_2, district, city, postal code, country, phone)" df_address.rename(columns={"address_id": "location_id"}) dim_location = df_address.loc[:, "location_id", "address_line_1", "address_line_2", "district", "city", "postal_code" "country", "phone"] # creates the dim_counterparty dataframe -counterparty_address = pd.merge(df_counterparty, df_address, left_on="legal_address_id", right_on='address_id', how="outer") -counterparty_address.rename(columns={"address_line_1": "counterparty_legal_address_line_1", "address_line_2": "counterparty_legal_address_line_2", - "district": "counterparty_legal_district", "city": "counterparty_legal_city", "postal_code": "counterparty_postal_code", - "country": "counterparty_legal_country", "phone": "counterparty_legal_phone_number"}) +counterparty_address = pd.merge( + df_counterparty, + df_address, + left_on="legal_address_id", + right_on="address_id", + how="outer" +) +counterparty_address.rename( + columns={ + "address_line_1": "counterparty_legal_address_line_1", + "address_line_2": "counterparty_legal_address_line_2", + "district": "counterparty_legal_district", + "city": "counterparty_legal_city", + "postal_code": "counterparty_postal_code", + "country": "counterparty_legal_country", + "phone": "counterparty_legal_phone_number", + } +) dim_counterparty = df_counterparty.loc[:, "counterparty_id", "counterparty_legal_name", "counterparty_legal_address_line_1", "counterparty_legal_address_line_2", "counterparty_legal_district", "counterpart_legal_city", @@ -68,4 +83,4 @@ dim_date = ["date_id", "year", "month", "day", "day_of_week", "day_name", "month # TO DO: -# fact_sales_order
\ No newline at end of file +# fact_sales_order diff --git a/src/transform_lambda.py b/src/transform_lambda.py index 9238180..920a24f 100644 --- a/src/transform_lambda.py +++ b/src/transform_lambda.py @@ -1,8 +1,6 @@ import json import boto3 import re -import io -from io import StringIO import pandas as pd @@ -35,3 +33,5 @@ def read_from_s3_subfolder_to_df(tables, bucket, client=boto3.client("s3")): list_of_df = [pd.read_csv(key) for key in list_of_keys] table_dfs[table] = pd.concat(list_of_df) return table_dfs + + |
