src/fact-purchase-table.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34

from src.transform_lambda import read_from_s3_subfolder_to_df, tables
from src.extract_lambda import extract_bucket
import json
import boto3
import re
import pandas as pd


dict_of_df = read_from_s3_subfolder_to_df(tables, extract_bucket(), client=boto3.client("s3"))


# iterates through each dataframe in the list of dataframes and assigns them to a variable
df_staff = dict_of_df['staff'] ##no change
df_currency = dict_of_df['currency'] ##scraping API 
df_counterparty = dict_of_df['counterparty']
df_address = dict_of_df['address']
df_department = dict_of_df['department']
df_purchase_order = dict_of_df['purchase_order']

## dim_staff table is the same across the schemas (no change)

## dim_counterparty table

## dim_location df_currency --> drops 2 columns
dim_location = df_address.drop(labels=['created_at', 'last_updated'], axis=1).rename(columns={'address_id': 'location_id'})

## dim_counterparty 
df_prefixed_address = df_address.add_prefix('counterparty_legal_', axis=1) 
pd.merge(df_counterparty, 
         df_prefixed_address, 
         left_on="legal_address_id", 
         right_on="address_id", 
         how="outer")