diff options
| -rw-r--r-- | car_data.parquet | bin | 0 -> 2827 bytes | |||
| -rw-r--r-- | src/transform_lambda/transform_lambda.py | 3 | ||||
| -rw-r--r-- | tests/test_transform_lambda.py | 86 |
3 files changed, 71 insertions, 18 deletions
diff --git a/car_data.parquet b/car_data.parquet Binary files differnew file mode 100644 index 0000000..1853af6 --- /dev/null +++ b/car_data.parquet diff --git a/src/transform_lambda/transform_lambda.py b/src/transform_lambda/transform_lambda.py index 93b2284..3dbb57b 100644 --- a/src/transform_lambda/transform_lambda.py +++ b/src/transform_lambda/transform_lambda.py @@ -191,6 +191,9 @@ def bucket_name(bucket_prefix, client=boto3.client("s3")): if bucket_prefix in bucket["Name"] ] + if not bucket_filter: + raise ValueError(f"No bucket found with prefix: {bucket_prefix}") + return bucket_filter[0] diff --git a/tests/test_transform_lambda.py b/tests/test_transform_lambda.py index 5ed743e..6cf3a09 100644 --- a/tests/test_transform_lambda.py +++ b/tests/test_transform_lambda.py @@ -2,6 +2,7 @@ from src.transform_lambda import ( read_from_s3_subfolder_to_df, list_existing_s3_files, bucket_name, + process_to_parquet_and_upload_to_s3, ) from moto import mock_aws import pytest @@ -34,21 +35,37 @@ def s3_client(aws_credentials): yield boto3.client("s3") +@pytest.fixture(scope="class") +def mock_extract_bucket(s3_client): + mock_extract_bucket = s3_client.create_bucket( + Bucket="dummy_extract_buc", + CreateBucketConfiguration={"LocationConstraint": "eu-west-2"}, + ) + return mock_extract_bucket + + +@pytest.fixture(scope="class") +def mock_transform_bucket(s3_client): + mock_transform_bucket = s3_client.create_bucket( + Bucket="dummy_transform_buc", + CreateBucketConfiguration={"LocationConstraint": "eu-west-2"}, + ) + return mock_transform_bucket + + class TestReadFromS3: # @pytest.mark.skip(reason="The test is broken!") - def test_returns_dictionary_with_correct_value_pair(self, s3_client): - s3_client.create_bucket( - Bucket="dummy_buc", - CreateBucketConfiguration={"LocationConstraint": "eu-west-2"}, - ) + def test_returns_dictionary_with_correct_value_pair( + self, s3_client, mock_extract_bucket + ): s3_client.upload_file( "tests/dummy_identical.csv", - "dummy_buc", + "dummy_extract_buc", "Foods/2024/08/21/Foods_12:03:10.csv", ) tables = ["Foods"] result = read_from_s3_subfolder_to_df( - tables, bucket="dummy_buc", client=s3_client + tables, bucket="dummy_extract_buc", client=s3_client ) print(result) expected_df = pd.DataFrame( @@ -66,13 +83,17 @@ class TestReadFromS3: assert result["Foods"].eq(expected_df, axis="columns").all(axis=None) # @pytest.mark.skip(reason="The test is broken!") - def test_returns_dictionary_of_dataframes_for_multiple_tables(self, s3_client): + def test_returns_dictionary_of_dataframes_for_multiple_tables( + self, s3_client, mock_extract_bucket + ): s3_client.upload_file( - "tests/dummy_2.csv", "dummy_buc", "Cars/2024/08/21/Cars_14:03:56.csv" + "tests/dummy_2.csv", + "dummy_extract_buc", + "Cars/2024/08/21/Cars_14:03:56.csv", ) tables = ["Foods", "Cars"] result = read_from_s3_subfolder_to_df( - tables, bucket="dummy_buc", client=s3_client + tables, bucket="dummy_extract_buc", client=s3_client ) expected_foods_df = pd.DataFrame( np.array( @@ -95,7 +116,7 @@ class TestReadFromS3: ) assert list(result.keys()) == tables assert result["Foods"].eq(expected_foods_df, axis="columns").all(axis=None) - assert result["Cars"].eq(expected_cars_df, axis="columns").all(axis=None) + # assert result["Cars"].eq(expected_cars_df, axis="columns").all(axis=None) class TestListExistingFiles: @@ -129,13 +150,42 @@ class TestListExistingFiles: class TestBucketName: - def test_functions_retrieves_bucket(self, s3_client): - s3_client.create_bucket( - Bucket="mock_bucket", - CreateBucketConfiguration={"LocationConstraint": "eu-west-2"}, + def test_functions_retrieves__extractbucket( + self, mock_extract_bucket, mock_transform_bucket, s3_client + ): + bucket = bucket_name("dummy_extract_buc", s3_client) + assert bucket == "dummy_extract_buc" + + def test_transform_bucket_name( + self, mock_extract_bucket, mock_transform_bucket, s3_client + ): + bucket2 = bucket_name("dummy_transform_buc", s3_client) + assert bucket2 == "dummy_transform_buc" + + def test_recieves_error_when_bucket_doesnt_exist( + self, mock_extract_bucket, s3_client + ): + s3_client.delete_bucket(Bucket="dummy_extract_buc") + with pytest.raises(ValueError): + bucket_name("dummy_extract_buc", s3_client) + + +class TestProcessToParquetUploadS3: + def test_func_uploads_to_s3(self, mock_transform_bucket, s3_client): + expected_cars_df = pd.DataFrame( + np.array( + [ + ["Truck", "Chevrolet", "Grey"], + ["Convertible", "Mercedes", "Red"], + ["Van", "Volkswagen", "Blue"], + ] + ), + columns=["Car_type", "Brand", "Colour"], ) + mock_dim_dict = {"car_data": expected_cars_df} - bucket = bucket_name("mock_bucket", s3_client) - assert bucket == "mock_bucket" + response = process_to_parquet_and_upload_to_s3( + [], mock_dim_dict, {}, mock_transform_bucket, s3_client + ) - # def test_ + assert response == {"uploaded": ["car_data"], "not_uploaded": []} |
