from src.transform_lambda import read_from_s3_subfolder_to_df from moto import mock_aws import pytest import pandas as pd import os import boto3 import numpy as np @pytest.fixture(scope='class') def aws_credentials(): os.environ["AWS_ACCESS_KEY_ID"] = 'testing' os.environ["AWS_SECRET_ACCESS_KEY"] = 'testing' os.environ["AWS_SECURIT_TOKEN"] = 'testing' os.environ["AWS_SESSION_TOKEN"] = 'testing' os.environ["AWS_DEFAULT_REGION"]= 'eu-west-2' @pytest.fixture(scope='class') def s3_client(aws_credentials): with mock_aws(): yield boto3.client('s3') class TestReadFromS3: def test_returns_dictionary_with_correct_value_pair(self,s3_client): s3_client.create_bucket(Bucket = 'dummy_buc',CreateBucketConfiguration={ 'LocationConstraint': 'eu-west-2' }) s3_client.upload_file('tests/dummy_identical.csv', 'dummy_buc', 'Foods/2024/08/21/Foods_12:03:10.csv') tables = ['Foods'] result = read_from_s3_subfolder_to_df(tables,bucket='dummy_buc',client=s3_client) print(result) expected_df = pd.DataFrame(np.array([['Vegetable', 'Sour', 'Green'], ['Berry', 'Sweet', 'Red']]), columns=['Food_type', 'Flavour', 'Colour']) assert isinstance(result,dict) assert list(result.keys())[0] == 'Foods' assert isinstance(result['Foods'],pd.DataFrame) assert result['Foods'].eq(expected_df,axis='columns').all(axis=None) def test_returns_dictionary_of_dataframes_for_multiple_tables(self,s3_client): s3_client.upload_file('tests/dummy_2.csv', 'dummy_buc', 'Cars/2024/08/21/Cars_14:03:56.csv') tables = ['Foods','Cars'] result = read_from_s3_subfolder_to_df(tables,bucket='dummy_buc',client=s3_client) expected_foods_df = pd.DataFrame(np.array([['Vegetable', 'Sour', 'Green'], ['Berry', 'Sweet', 'Red']]), columns=['Food_type', 'Flavour', 'Colour']) expected_cars_df = pd.DataFrame(np.array([['Truck', 'Chevrolet', 'Grey'], ['Convertible', 'Mercedes','Red'],['Van','Volkswagen','Blue']]), columns=['Car_type', 'Brand', 'Colour']) assert list(result.keys()) == tables assert result['Foods'].eq(expected_foods_df,axis='columns').all(axis=None) assert result['Cars'].eq(expected_cars_df,axis='columns').all(axis=None)