blob: 91f507783e8b41bdd3d403ee71f1dbe1bbd0364c (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
|
from src.transform_lambda import read_from_s3_subfolder_to_df, tables
from src.extract_lambda import extract_bucket
import json
import boto3
import re
import pandas as pd
# iterates through each dataframe in the list of dataframes and assigns them to a variable
def get_dfs_from_dict(tables,dictionary=dict_of_df):
for table in tables:
df_staff = dict_of_df['staff'] ##no change
df_currency = dict_of_df['currency'] ##scraping API
df_counterparty = dict_of_df['counterparty']
df_address = dict_of_df['address']
df_department = dict_of_df['department']
df_purchase_order = dict_of_df['purchase_order']
## dim_staff table is the same across the schemas (no change)
## dim_location from address --> drops 2 columns
def create_dim_location(dict_of_df):
dim_location = dict_of_df['address'].drop(labels=['created_at', 'last_updated'], axis=1).rename(columns={'address_id': 'location_id'})
return dim_location
## dim_counterparty from address and counterparty
def create_dim_counterparty(dict_of_df):
df_prefixed_address = dict_of_df['address'].add_prefix('counterparty_legal_', axis=1)
pd.merge(dict_of_df['counterparty'],
df_prefixed_address,
left_on="legal_address_id",
right_on="address_id",
how="outer")
def create_fact_purchase_order(dict_of_df):
df_po = dict_of_df['purchase_order']
df_po.index.name = 'purchase_record_id'
#df_po['create_date'] = df_po['create_at'].date()
#df_po['create_time'] = df_po['create_at'].time()
df_po['agreed_delivery_date'] =
df_po['agreed_payment_date']
|