aboutsummaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/fact-sales-order.py48
-rw-r--r--src/transform_lambda.py37
2 files changed, 69 insertions, 16 deletions
diff --git a/src/fact-sales-order.py b/src/fact-sales-order.py
index a143889..30c958f 100644
--- a/src/fact-sales-order.py
+++ b/src/fact-sales-order.py
@@ -11,7 +11,7 @@ df_address = dict_of_df[address]
df_staff = dict_of_df[staff]
df_department = dict_of_df[department]
df_counterparty = dict_of_df[counterparty]
-
+df_sales = dict_of_df[sales]
# creates the dim_design dataframe
dim_design = df_design["design_id", "design_name", "file_name", "file_location"]
@@ -27,28 +27,46 @@ currency_names = pd.DataFrame(data=d)
join_currency = pd.merge(df_currency, currency_names, on="currency_name", how="outer")
dim_currency = join_currency["currency_id", "currency_code", "currency_name"]
-# creates the dim_location dataframe
-# need to change address id to location id
-"dim_location dataframe: (location_id, address_line_1, address_line_2, district, city, postal code, country, phone)"
-dim_location = df_address["address_id", "address_line_1", "address_line_2", "district", "city", "postal_code" "country", "phone"]
-
-
-
+# Using .map to add currency_name column and link it to the currency code
+# dim_currency = df_currency["currency_id", "currency_code"]
+# mappings = {
+# "GBP": "Pound",
+# "USD": "US Dollar",
+# "EUR": "Euro"
+# }
+# dim_currency["currency_name"] = dim_currency["currency_code"].map(mappings)
+# creates the dim_location dataframe
+# need to change address id to location id
+"dim_location dataframe: (location_id, address_line_1, address_line_2, district, city, postal code, country, phone)"
+df_address.rename(columns={"address_id": "location_id"})
+dim_location = df_address["location_id", "address_line_1", "address_line_2", "district", "city", "postal_code" "country", "phone"]
+# creates the dim_counterparty dataframe
+counterparty_address = pd.merge(df_counterparty, df_address, left_on="legal_address_id", right_on='address_id', how="outer")
+counterparty_address.rename(columns={"address_line_1": "counterparty_legal_address_line_1", "address_line_2": "counterparty_legal_address_line_2",
+ "district": "counterparty_legal_district", "city": "counterparty_legal_city", "postal_code": "counterparty_postal_code",
+ "country": "counterparty_legal_country", "phone": "counterparty_legal_phone_number"})
+dim_counterparty = df_counterparty["counterparty_id", "counterparty_legal_name", "counterparty_legal_address_line_1",
+ "counterparty_legal_address_line_2", "counterparty_legal_district", "counterpart_legal_city",
+ "counterparty_legal_postal_code", "counterparty_legal_country", "counterparty_legal_phone_number"]
-# creates the dim_counterparty dataframe
-# counterparty_address = pd.merge(df_counterparty, df_address, left_on="legal_address_id", right_on='address_id', how="outer")
+# creates the dim_date dataframe
+df_sales = df_sales["agreed_delivery_date"]
+df_sales["agreed_delivery_date"] = pd.to_datetime["agreed_delivery_date"]
+df_sales["year"] = df_sales["agreed_delivery_date"].dt.year
+df_sales["month"] = df_sales["agreed_delivery_date"].dt.month
+df_sales["day"] = df_sales["agreed_delivery_date"].dt.day
+df_sales["day_of_week"] = df_sales["agreed_delivery_date"].dt.dayofweek
+df_sales["day_name"] = df_sales["agreed_delivery_date"].dt.day_name()
+df_sales["month_name"] = df_sales["agreed_delivery_date"].dt.month_name()
+df_sales["quarter"] = df_sales["agreed_delivery_date"].dt.quarter()
-# dim_counterparty = df_counterparty["counterparty_id", "counterparty_legal_name", "counterparty_legal_address_line_1",
-# "counterparty_legal_address_line_2", "counterparty_legal_district", "counterpart_legal_city",
-# "counterparty_legal_postal_code", "counterparty_legal_country", "counterparty_legal_phone_number"]
+dim_date = ["date_id", "year", "month", "day", "day_of_week", "day_name", "month_name", "quarter"] #series.dt.quarter()
# TO DO:
-# dim_location
-# dim_date
# fact_sales_order \ No newline at end of file
diff --git a/src/transform_lambda.py b/src/transform_lambda.py
index c6a8e60..9238180 100644
--- a/src/transform_lambda.py
+++ b/src/transform_lambda.py
@@ -1,2 +1,37 @@
-def lambda_handler():
+import json
+import boto3
+import re
+import io
+from io import StringIO
+import pandas as pd
+
+
+def lambda_handler(event, context):
pass
+
+
+tables = [
+ "sales_order",
+ "transaction",
+ "payment",
+ "counterparty",
+ "address",
+ "staff",
+ "purchase_order",
+ "department",
+ "currency",
+ "design",
+ "payment_type",
+]
+
+
+def read_from_s3_subfolder_to_df(tables, bucket, client=boto3.client("s3")):
+ table_dfs = {}
+ for table in tables:
+ response = client.list_objects_v2(Bucket=bucket, Prefix=table)
+ list_of_keys = [
+ "s3://" + bucket + "/" + object["Key"] for object in response["Contents"]
+ ]
+ list_of_df = [pd.read_csv(key) for key in list_of_keys]
+ table_dfs[table] = pd.concat(list_of_df)
+ return table_dfs
git.ajschof.me — hosted by ajschofield — powered by cgit