blob: 53c0148cbd8b4e1c36d63443fb0bc3f7abb22b09 (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
|
from src.transform_lambda import read_from_s3_subfolder_to_df, tables
from src.extract_lambda import extract_bucket
import json
import boto3
import re
import pandas as pd
dict_of_df = read_from_s3_subfolder_to_df(tables, extract_bucket(), client=boto3.client("s3"))
# iterates through each dataframe in the list of dataframes and assigns them to a variable
df_staff = dict_of_df['staff'] ##no change
df_currency = dict_of_df['currency'] ##scraping API
df_counterparty = dict_of_df['counterparty']
df_address = dict_of_df['address']
df_department = dict_of_df['department']
df_purchase_order = dict_of_df['purchase_order']
## dim_staff table is the same across the schemas (no change)
## dim_counterparty table
## dim_location df_currency --> drops 2 columns
dim_location = df_address.drop(labels=['created_at', 'last_updated'], axis=1).rename(columns={'address_id': 'location_id'})
## dim_counterparty
df_prefixed_address = df_address.add_prefix('counterparty_legal_', axis=1)
pd.merge(df_counterparty,
df_prefixed_address,
left_on="legal_address_id",
right_on="address_id",
how="outer")
|