aboutsummaryrefslogtreecommitdiffstats
path: root/src/transform_lambda.py
blob: b176ccc789094acfc7e6415de0d86b3cfc1fa974 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
# from src.extract_lambda import extract_bucket
import json
import boto3
import re
import io
from io import StringIO
import pandas as pd


def lambda_handler(event, context):
    pass


tables = [
    "sales_order",
    "transaction",
    "payment",
    "counterparty",
    "address",
    "staff",
    "purchase_order",
    "department",
    "currency",
    "design",
    "payment_type",
]


def read_from_s3_subfolder_to_df(tables, bucket, client=boto3.client("s3")):
    table_dfs = {}
    for table in tables:
        response = client.list_objects_v2(Bucket=bucket, Prefix=table)
        list_of_keys = [
            "s3://" + bucket + "/" + object["Key"] for object in response["Contents"]
        ]
        list_of_df = [pd.read_csv(key) for key in list_of_keys]
        table_dfs[table] = pd.concat(list_of_df)
    return table_dfs
git.ajschof.me — hosted by ajschofield — powered by cgit