src/fact-purchase-table.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41

from src.transform_lambda import read_from_s3_subfolder_to_df, tables
from src.extract_lambda import extract_bucket
import json
import boto3
import re
import pandas as pd


# iterates through each dataframe in the list of dataframes and assigns them to a variable 
def get_dfs_from_dict(tables,dictionary=dict_of_df):
    for table in tables:
    df_staff = dict_of_df['staff'] ##no change
    df_currency = dict_of_df['currency'] ##scraping API 
    df_counterparty = dict_of_df['counterparty']
    df_address = dict_of_df['address']
    df_department = dict_of_df['department']
    df_purchase_order = dict_of_df['purchase_order']

## dim_staff table is the same across the schemas (no change)

## dim_location from address --> drops 2 columns
def create_dim_location(dict_of_df):
    dim_location = dict_of_df['address'].drop(labels=['created_at', 'last_updated'], axis=1).rename(columns={'address_id': 'location_id'})
    return dim_location

## dim_counterparty from address and counterparty
def create_dim_counterparty(dict_of_df):
    df_prefixed_address = dict_of_df['address'].add_prefix('counterparty_legal_', axis=1) 
    pd.merge(dict_of_df['counterparty'], 
            df_prefixed_address, 
            left_on="legal_address_id", 
            right_on="address_id", 
            how="outer")

def create_fact_purchase_order(dict_of_df):
    df_po = dict_of_df['purchase_order']
    df_po.index.name = 'purchase_record_id'
    #df_po['create_date'] = df_po['create_at'].date()
    #df_po['create_time'] = df_po['create_at'].time()
    df_po['agreed_delivery_date'] = 
    df_po['agreed_payment_date']