1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
|
from src.transform_lambda import read_from_s3_subfolder_to_df
from moto import mock_aws
import pytest
import pandas as pd
import os
import boto3
import numpy as np
@pytest.fixture(scope="class")
def aws_credentials():
os.environ["AWS_ACCESS_KEY_ID"] = "testing"
os.environ["AWS_SECRET_ACCESS_KEY"] = "testing"
os.environ["AWS_SECURIT_TOKEN"] = "testing"
os.environ["AWS_SESSION_TOKEN"] = "testing"
os.environ["AWS_DEFAULT_REGION"] = "eu-west-2"
@pytest.fixture(scope="class")
def s3_client(aws_credentials):
with mock_aws():
yield boto3.client("s3")
class TestReadFromS3:
def test_returns_dictionary_with_correct_value_pair(self, s3_client):
s3_client.create_bucket(
Bucket="dummy_buc",
CreateBucketConfiguration={"LocationConstraint": "eu-west-2"},
)
s3_client.upload_file(
"tests/dummy_identical.csv",
"dummy_buc",
"Foods/2024/08/21/Foods_12:03:10.csv",
)
tables = ["Foods"]
result = read_from_s3_subfolder_to_df(
tables, bucket="dummy_buc", client=s3_client
)
print(result)
expected_df = pd.DataFrame(
np.array([["Vegetable", "Sour", "Green", "2022-11-03 14:20:49.962"], ["Berry", "Sweet", "Red", "2022-11-03 14:20:49.962"]]),
columns=["Food_type", "Flavour", "Colour", "last_updated"],
)
assert isinstance(result, dict)
assert list(result.keys())[0] == "Foods"
assert isinstance(result["Foods"], pd.DataFrame)
assert result["Foods"].eq(expected_df, axis="columns").all(axis=None)
def test_returns_dictionary_of_dataframes_for_multiple_tables(self, s3_client):
s3_client.upload_file(
"tests/dummy_2.csv", "dummy_buc", "Cars/2024/08/21/Cars_14:03:56.csv"
)
tables = ["Foods", "Cars"]
result = read_from_s3_subfolder_to_df(
tables, bucket="dummy_buc", client=s3_client
)
expected_foods_df = pd.DataFrame(
np.array([["Vegetable", "Sour", "Green", "2022-11-03 14:20:49.962"], ["Berry", "Sweet", "Red", "2022-11-03 14:20:49.962"]]),
columns=["Food_type", "Flavour", "Colour", "last_updated"],
)
expected_cars_df = pd.DataFrame(
np.array(
[
["Truck", "Chevrolet", "Grey"],
["Convertible", "Mercedes", "Red"],
["Van", "Volkswagen", "Blue"],
]
),
columns=["Car_type", "Brand", "Colour"],
)
assert list(result.keys()) == tables
assert result["Foods"].eq(expected_foods_df, axis="columns").all(axis=None)
assert result["Cars"].eq(expected_cars_df, axis="columns").all(axis=None)
|