diff options
Diffstat (limited to 'src')
| -rw-r--r-- | src/fact-sales-order.py | 48 | ||||
| -rw-r--r-- | src/transform_lambda.py | 37 |
2 files changed, 69 insertions, 16 deletions
diff --git a/src/fact-sales-order.py b/src/fact-sales-order.py index a143889..30c958f 100644 --- a/src/fact-sales-order.py +++ b/src/fact-sales-order.py @@ -11,7 +11,7 @@ df_address = dict_of_df[address] df_staff = dict_of_df[staff] df_department = dict_of_df[department] df_counterparty = dict_of_df[counterparty] - +df_sales = dict_of_df[sales] # creates the dim_design dataframe dim_design = df_design["design_id", "design_name", "file_name", "file_location"] @@ -27,28 +27,46 @@ currency_names = pd.DataFrame(data=d) join_currency = pd.merge(df_currency, currency_names, on="currency_name", how="outer") dim_currency = join_currency["currency_id", "currency_code", "currency_name"] -# creates the dim_location dataframe -# need to change address id to location id -"dim_location dataframe: (location_id, address_line_1, address_line_2, district, city, postal code, country, phone)" -dim_location = df_address["address_id", "address_line_1", "address_line_2", "district", "city", "postal_code" "country", "phone"] - - - +# Using .map to add currency_name column and link it to the currency code +# dim_currency = df_currency["currency_id", "currency_code"] +# mappings = { +# "GBP": "Pound", +# "USD": "US Dollar", +# "EUR": "Euro" +# } +# dim_currency["currency_name"] = dim_currency["currency_code"].map(mappings) +# creates the dim_location dataframe +# need to change address id to location id +"dim_location dataframe: (location_id, address_line_1, address_line_2, district, city, postal code, country, phone)" +df_address.rename(columns={"address_id": "location_id"}) +dim_location = df_address["location_id", "address_line_1", "address_line_2", "district", "city", "postal_code" "country", "phone"] +# creates the dim_counterparty dataframe +counterparty_address = pd.merge(df_counterparty, df_address, left_on="legal_address_id", right_on='address_id', how="outer") +counterparty_address.rename(columns={"address_line_1": "counterparty_legal_address_line_1", "address_line_2": "counterparty_legal_address_line_2", + "district": "counterparty_legal_district", "city": "counterparty_legal_city", "postal_code": "counterparty_postal_code", + "country": "counterparty_legal_country", "phone": "counterparty_legal_phone_number"}) +dim_counterparty = df_counterparty["counterparty_id", "counterparty_legal_name", "counterparty_legal_address_line_1", + "counterparty_legal_address_line_2", "counterparty_legal_district", "counterpart_legal_city", + "counterparty_legal_postal_code", "counterparty_legal_country", "counterparty_legal_phone_number"] -# creates the dim_counterparty dataframe -# counterparty_address = pd.merge(df_counterparty, df_address, left_on="legal_address_id", right_on='address_id', how="outer") +# creates the dim_date dataframe +df_sales = df_sales["agreed_delivery_date"] +df_sales["agreed_delivery_date"] = pd.to_datetime["agreed_delivery_date"] +df_sales["year"] = df_sales["agreed_delivery_date"].dt.year +df_sales["month"] = df_sales["agreed_delivery_date"].dt.month +df_sales["day"] = df_sales["agreed_delivery_date"].dt.day +df_sales["day_of_week"] = df_sales["agreed_delivery_date"].dt.dayofweek +df_sales["day_name"] = df_sales["agreed_delivery_date"].dt.day_name() +df_sales["month_name"] = df_sales["agreed_delivery_date"].dt.month_name() +df_sales["quarter"] = df_sales["agreed_delivery_date"].dt.quarter() -# dim_counterparty = df_counterparty["counterparty_id", "counterparty_legal_name", "counterparty_legal_address_line_1", -# "counterparty_legal_address_line_2", "counterparty_legal_district", "counterpart_legal_city", -# "counterparty_legal_postal_code", "counterparty_legal_country", "counterparty_legal_phone_number"] +dim_date = ["date_id", "year", "month", "day", "day_of_week", "day_name", "month_name", "quarter"] #series.dt.quarter() # TO DO: -# dim_location -# dim_date # fact_sales_order
\ No newline at end of file diff --git a/src/transform_lambda.py b/src/transform_lambda.py index c6a8e60..9238180 100644 --- a/src/transform_lambda.py +++ b/src/transform_lambda.py @@ -1,2 +1,37 @@ -def lambda_handler(): +import json +import boto3 +import re +import io +from io import StringIO +import pandas as pd + + +def lambda_handler(event, context): pass + + +tables = [ + "sales_order", + "transaction", + "payment", + "counterparty", + "address", + "staff", + "purchase_order", + "department", + "currency", + "design", + "payment_type", +] + + +def read_from_s3_subfolder_to_df(tables, bucket, client=boto3.client("s3")): + table_dfs = {} + for table in tables: + response = client.list_objects_v2(Bucket=bucket, Prefix=table) + list_of_keys = [ + "s3://" + bucket + "/" + object["Key"] for object in response["Contents"] + ] + list_of_df = [pd.read_csv(key) for key in list_of_keys] + table_dfs[table] = pd.concat(list_of_df) + return table_dfs |
