diff options
| author | Ellie <167526066+ellsymonds@users.noreply.github.com> | 2024-08-28 09:36:20 +0100 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2024-08-28 09:36:20 +0100 |
| commit | d0b0fa9ff13d0739014cb8a42887f2d6fe11ae3f (patch) | |
| tree | 7f743a3f070f453d2f11b8a5103b0866e169d1af /src/dataframes.py | |
| parent | 95935534931b5ff6e617ba74c86cb7a6718128e4 (diff) | |
| parent | 572617d1c33646f2c58fad0c2859835542b2829f (diff) | |
| download | de-project-bentley-d0b0fa9ff13d0739014cb8a42887f2d6fe11ae3f.tar.gz de-project-bentley-d0b0fa9ff13d0739014cb8a42887f2d6fe11ae3f.zip | |
Merge branch 'development' into feature/load-lambda-tests
Diffstat (limited to 'src/dataframes.py')
| -rw-r--r-- | src/dataframes.py | 85 |
1 files changed, 26 insertions, 59 deletions
diff --git a/src/dataframes.py b/src/dataframes.py index 43facd6..ab32fff 100644 --- a/src/dataframes.py +++ b/src/dataframes.py @@ -20,9 +20,7 @@ import requests def create_fact_sales_order(dict_of_df): df_sales = dict_of_df["sales_order"] df_sales.index.name = "sales_record_id" - - -<< << << < HEAD + df_sales["created_date"] = df_sales["created_at"].astype( "datetime64[ns]").dt.date df_sales["created_time"] = ( @@ -33,17 +31,6 @@ def create_fact_sales_order(dict_of_df): ) df_sales["last_updated_time"] = ( df_sales["last_updated"].astype("datetime64[ns]").dt.floor("s").dt.time -== == == = - df_sales["created_date"]=pd.to_datetime( - df_sales["created_at"], format="%Y-%m-%d") - df_sales["created_time"]=pd.to_datetime( - df_sales["created_at"], format="%H-%M-%S") - df_sales["last_updated_date"]=pd.to_datetime( - df_sales["last_updated"], format="%Y-%m-%d" - ) - df_sales["last_updated_time"]=pd.to_datetime( - df_sales["last_updated"], format="%H-%M-%S" ->> >>>> > 5db3f61(style: format code with Autopep8, Black and Ruff Formatter) ) df_sales["agreed_delivery_date"]=pd.to_datetime( df_sales["agreed_delivery_date"], format="%Y-%m-%d" @@ -51,11 +38,28 @@ def create_fact_sales_order(dict_of_df): df_sales["agreed_payment_date"]=pd.to_datetime( df_sales["agreed_payment_date"], format="%Y-%m-%d" ) -<< << << < HEAD df_sales=df_sales.drop(labels=["created_at", "last_updated"], axis=1) -== == == = - df_sales.drop(labels=["created_at", "last_updated"], axis=1, inplace=True) ->> >>>> > 5db3f61(style: format code with Autopep8, Black and Ruff Formatter) + + df_sales.reset_index(inplace=True) + return df_sales + + df_sales["created_date"] = df_sales["created_at"].astype("datetime64[ns]").dt.date + df_sales["created_time"] = ( + df_sales["created_at"].astype("datetime64[ns]").dt.floor("s").dt.time + ) + df_sales["last_updated_date"] = ( + df_sales["last_updated"].astype("datetime64[ns]").dt.date + ) + df_sales["last_updated_time"] = ( + df_sales["last_updated"].astype("datetime64[ns]").dt.floor("s").dt.time + ) + df_sales["agreed_delivery_date"] = pd.to_datetime( + df_sales["agreed_delivery_date"], format="%Y-%m-%d" + ) + df_sales["agreed_payment_date"] = pd.to_datetime( + df_sales["agreed_payment_date"], format="%Y-%m-%d" + ) + df_sales = df_sales.drop(labels=["created_at", "last_updated"], axis=1) df_sales.reset_index(inplace=True) return df_sales @@ -66,7 +70,6 @@ def create_fact_sales_order(dict_of_df): def create_fact_purchase_orders(dict_of_df): df_po=dict_of_df["purchase_order"] df_po.index.name="purchase_record_id" -<< << << < HEAD df_po["created_date"]=df_po["created_at"].astype("datetime64[ns]").dt.date df_po["created_time"]=( df_po["created_at"].astype("datetime64[ns]").dt.floor("s").dt.time @@ -75,17 +78,7 @@ def create_fact_purchase_orders(dict_of_df): "datetime64[ns]").dt.date df_po["last_updated_time"]=( df_po["last_updated"].astype("datetime64[ns]").dt.floor("s").dt.time -== == == = - df_po["created_date"]=pd.to_datetime( - df_po["created_at"], format="%Y-%m-%d") - df_po["created_time"]=pd.to_datetime( - df_po["created_at"], format="%H-%M-%S") - df_po["last_updated_date"]=pd.to_datetime( - df_po["last_updated"], format="%Y-%m-%d" - ) - df_po["last_updated_time"]=pd.to_datetime( - df_po["last_updated"], format="%H-%M-%S" ->> >>>> > 5db3f61(style: format code with Autopep8, Black and Ruff Formatter) + ) df_po["agreed_delivery_date"]=pd.to_datetime( df_po["agreed_delivery_date"], format="%Y-%m-%d" @@ -93,11 +86,7 @@ def create_fact_purchase_orders(dict_of_df): df_po["agreed_payment_date"]=pd.to_datetime( df_po["agreed_payment_date"], format="%Y-%m-%d" ) -<< << << < HEAD df_po=df_po.drop(labels=["created_at", "last_updated"], axis=1) -== == == = - df_po.drop(labels=["created_at", "last_updated"], axis=1, inplace=True) ->> >>>> > 5db3f61(style: format code with Autopep8, Black and Ruff Formatter) df_po.reset_index(inplace=True) return df_po @@ -108,7 +97,6 @@ def create_fact_purchase_orders(dict_of_df): def create_fact_payment(dict_of_df): df_payment=dict_of_df["payment"] df_payment.index.name="payment_record_id" -<< << << < HEAD df_payment["created_date"]=( df_payment["created_at"].astype("datetime64[ns]").dt.date ) @@ -121,29 +109,12 @@ def create_fact_payment(dict_of_df): df_payment["last_updated_time"]=( df_payment["last_updated"].astype( "datetime64[ns]").dt.floor("s").dt.time -== == == = - df_payment["created_date"]=pd.to_datetime( - df_payment["created_at"], format="%Y-%m-%d" - ) - df_payment["created_time"]=pd.to_datetime( - df_payment["created_at"], format="%H-%M-%S" - ) - df_payment["last_updated_date"]=pd.to_datetime( - df_payment["last_updated"], format="%Y-%m-%d" - ) - df_payment["last_updated_time"]=pd.to_datetime( - df_payment["last_updated"], format="%H-%M-%S" ->> >>>> > 5db3f61(style: format code with Autopep8, Black and Ruff Formatter) ) df_payment["payment_date"]=pd.to_datetime( df_payment["payment_date"], format="%Y-%m-%d" ) -<< << << < HEAD df_payment=df_payment.drop(labels=["created_at", "last_updated"], axis=1) -== == == = - df_payment.drop( - labels=["created_at", "last_updated"], axis=1, inplace=True) ->> >>>> > 5db3f61(style: format code with Autopep8, Black and Ruff Formatter) + df_payment.reset_index(inplace=True) return df_payment @@ -196,14 +167,10 @@ def create_dim_date(dict_of_df): create_fact_purchase_orders(dict_of_df), create_fact_sales_order(dict_of_df), ] - list_of_date_columns=[] + list_of_date_columns = [] for df in fact_dfs: - date_col_names=[ -<< << << < HEAD + date_col_names = [ col_name for col_name in list(df.columns) if "_date" in col_name -== == == = - col_name for col_name in list(df.columns) if "date" in col_name ->> >>>> > 5db3f61(style: format code with Autopep8, Black and Ruff Formatter) ] for col in date_col_names: list_of_date_columns.append(df[col]) |
