aboutsummaryrefslogtreecommitdiffstats
path: root/src/dataframes.py
diff options
context:
space:
mode:
authorEllie <167526066+ellsymonds@users.noreply.github.com>2024-08-28 09:36:20 +0100
committerGitHub <noreply@github.com>2024-08-28 09:36:20 +0100
commitd0b0fa9ff13d0739014cb8a42887f2d6fe11ae3f (patch)
tree7f743a3f070f453d2f11b8a5103b0866e169d1af /src/dataframes.py
parent95935534931b5ff6e617ba74c86cb7a6718128e4 (diff)
parent572617d1c33646f2c58fad0c2859835542b2829f (diff)
downloadde-project-bentley-d0b0fa9ff13d0739014cb8a42887f2d6fe11ae3f.tar.gz
de-project-bentley-d0b0fa9ff13d0739014cb8a42887f2d6fe11ae3f.zip
Merge branch 'development' into feature/load-lambda-tests
Diffstat (limited to 'src/dataframes.py')
-rw-r--r--src/dataframes.py85
1 files changed, 26 insertions, 59 deletions
diff --git a/src/dataframes.py b/src/dataframes.py
index 43facd6..ab32fff 100644
--- a/src/dataframes.py
+++ b/src/dataframes.py
@@ -20,9 +20,7 @@ import requests
def create_fact_sales_order(dict_of_df):
df_sales = dict_of_df["sales_order"]
df_sales.index.name = "sales_record_id"
-
-
-<< << << < HEAD
+
df_sales["created_date"] = df_sales["created_at"].astype(
"datetime64[ns]").dt.date
df_sales["created_time"] = (
@@ -33,17 +31,6 @@ def create_fact_sales_order(dict_of_df):
)
df_sales["last_updated_time"] = (
df_sales["last_updated"].astype("datetime64[ns]").dt.floor("s").dt.time
-== == == =
- df_sales["created_date"]=pd.to_datetime(
- df_sales["created_at"], format="%Y-%m-%d")
- df_sales["created_time"]=pd.to_datetime(
- df_sales["created_at"], format="%H-%M-%S")
- df_sales["last_updated_date"]=pd.to_datetime(
- df_sales["last_updated"], format="%Y-%m-%d"
- )
- df_sales["last_updated_time"]=pd.to_datetime(
- df_sales["last_updated"], format="%H-%M-%S"
->> >>>> > 5db3f61(style: format code with Autopep8, Black and Ruff Formatter)
)
df_sales["agreed_delivery_date"]=pd.to_datetime(
df_sales["agreed_delivery_date"], format="%Y-%m-%d"
@@ -51,11 +38,28 @@ def create_fact_sales_order(dict_of_df):
df_sales["agreed_payment_date"]=pd.to_datetime(
df_sales["agreed_payment_date"], format="%Y-%m-%d"
)
-<< << << < HEAD
df_sales=df_sales.drop(labels=["created_at", "last_updated"], axis=1)
-== == == =
- df_sales.drop(labels=["created_at", "last_updated"], axis=1, inplace=True)
->> >>>> > 5db3f61(style: format code with Autopep8, Black and Ruff Formatter)
+
+ df_sales.reset_index(inplace=True)
+ return df_sales
+
+ df_sales["created_date"] = df_sales["created_at"].astype("datetime64[ns]").dt.date
+ df_sales["created_time"] = (
+ df_sales["created_at"].astype("datetime64[ns]").dt.floor("s").dt.time
+ )
+ df_sales["last_updated_date"] = (
+ df_sales["last_updated"].astype("datetime64[ns]").dt.date
+ )
+ df_sales["last_updated_time"] = (
+ df_sales["last_updated"].astype("datetime64[ns]").dt.floor("s").dt.time
+ )
+ df_sales["agreed_delivery_date"] = pd.to_datetime(
+ df_sales["agreed_delivery_date"], format="%Y-%m-%d"
+ )
+ df_sales["agreed_payment_date"] = pd.to_datetime(
+ df_sales["agreed_payment_date"], format="%Y-%m-%d"
+ )
+ df_sales = df_sales.drop(labels=["created_at", "last_updated"], axis=1)
df_sales.reset_index(inplace=True)
return df_sales
@@ -66,7 +70,6 @@ def create_fact_sales_order(dict_of_df):
def create_fact_purchase_orders(dict_of_df):
df_po=dict_of_df["purchase_order"]
df_po.index.name="purchase_record_id"
-<< << << < HEAD
df_po["created_date"]=df_po["created_at"].astype("datetime64[ns]").dt.date
df_po["created_time"]=(
df_po["created_at"].astype("datetime64[ns]").dt.floor("s").dt.time
@@ -75,17 +78,7 @@ def create_fact_purchase_orders(dict_of_df):
"datetime64[ns]").dt.date
df_po["last_updated_time"]=(
df_po["last_updated"].astype("datetime64[ns]").dt.floor("s").dt.time
-== == == =
- df_po["created_date"]=pd.to_datetime(
- df_po["created_at"], format="%Y-%m-%d")
- df_po["created_time"]=pd.to_datetime(
- df_po["created_at"], format="%H-%M-%S")
- df_po["last_updated_date"]=pd.to_datetime(
- df_po["last_updated"], format="%Y-%m-%d"
- )
- df_po["last_updated_time"]=pd.to_datetime(
- df_po["last_updated"], format="%H-%M-%S"
->> >>>> > 5db3f61(style: format code with Autopep8, Black and Ruff Formatter)
+
)
df_po["agreed_delivery_date"]=pd.to_datetime(
df_po["agreed_delivery_date"], format="%Y-%m-%d"
@@ -93,11 +86,7 @@ def create_fact_purchase_orders(dict_of_df):
df_po["agreed_payment_date"]=pd.to_datetime(
df_po["agreed_payment_date"], format="%Y-%m-%d"
)
-<< << << < HEAD
df_po=df_po.drop(labels=["created_at", "last_updated"], axis=1)
-== == == =
- df_po.drop(labels=["created_at", "last_updated"], axis=1, inplace=True)
->> >>>> > 5db3f61(style: format code with Autopep8, Black and Ruff Formatter)
df_po.reset_index(inplace=True)
return df_po
@@ -108,7 +97,6 @@ def create_fact_purchase_orders(dict_of_df):
def create_fact_payment(dict_of_df):
df_payment=dict_of_df["payment"]
df_payment.index.name="payment_record_id"
-<< << << < HEAD
df_payment["created_date"]=(
df_payment["created_at"].astype("datetime64[ns]").dt.date
)
@@ -121,29 +109,12 @@ def create_fact_payment(dict_of_df):
df_payment["last_updated_time"]=(
df_payment["last_updated"].astype(
"datetime64[ns]").dt.floor("s").dt.time
-== == == =
- df_payment["created_date"]=pd.to_datetime(
- df_payment["created_at"], format="%Y-%m-%d"
- )
- df_payment["created_time"]=pd.to_datetime(
- df_payment["created_at"], format="%H-%M-%S"
- )
- df_payment["last_updated_date"]=pd.to_datetime(
- df_payment["last_updated"], format="%Y-%m-%d"
- )
- df_payment["last_updated_time"]=pd.to_datetime(
- df_payment["last_updated"], format="%H-%M-%S"
->> >>>> > 5db3f61(style: format code with Autopep8, Black and Ruff Formatter)
)
df_payment["payment_date"]=pd.to_datetime(
df_payment["payment_date"], format="%Y-%m-%d"
)
-<< << << < HEAD
df_payment=df_payment.drop(labels=["created_at", "last_updated"], axis=1)
-== == == =
- df_payment.drop(
- labels=["created_at", "last_updated"], axis=1, inplace=True)
->> >>>> > 5db3f61(style: format code with Autopep8, Black and Ruff Formatter)
+
df_payment.reset_index(inplace=True)
return df_payment
@@ -196,14 +167,10 @@ def create_dim_date(dict_of_df):
create_fact_purchase_orders(dict_of_df),
create_fact_sales_order(dict_of_df),
]
- list_of_date_columns=[]
+ list_of_date_columns = []
for df in fact_dfs:
- date_col_names=[
-<< << << < HEAD
+ date_col_names = [
col_name for col_name in list(df.columns) if "_date" in col_name
-== == == =
- col_name for col_name in list(df.columns) if "date" in col_name
->> >>>> > 5db3f61(style: format code with Autopep8, Black and Ruff Formatter)
]
for col in date_col_names:
list_of_date_columns.append(df[col])
git.ajschof.me — hosted by ajschofield — powered by cgit