From f9f1ebc3eb7a9d4f312db5c1402a0197e0777b29 Mon Sep 17 00:00:00 2001 From: lian-manonog Date: Wed, 28 Aug 2024 12:42:29 +0100 Subject: wip: testing the process helper function --- tests/test_transform_lambda.py | 36 ++++++++++++++++++++++++++---------- 1 file changed, 26 insertions(+), 10 deletions(-) diff --git a/tests/test_transform_lambda.py b/tests/test_transform_lambda.py index 6cf3a09..3d6e82a 100644 --- a/tests/test_transform_lambda.py +++ b/tests/test_transform_lambda.py @@ -1,17 +1,13 @@ -from src.transform_lambda import ( - read_from_s3_subfolder_to_df, - list_existing_s3_files, - bucket_name, - process_to_parquet_and_upload_to_s3, -) -from moto import mock_aws +from src.transform_lambda.transform_lambda import read_from_s3_subfolder_to_df, list_existing_s3_files, bucket_name, process_to_parquet_and_upload_to_s3 import pytest import pandas as pd +from moto import mock_aws import os import boto3 from botocore.exceptions import ClientError import numpy as np +# /home/lianmei/northcoders/projects/de-project-bentley/src/transform_lambda/transform_lambda.py # import caplog import logging @@ -171,7 +167,7 @@ class TestBucketName: class TestProcessToParquetUploadS3: - def test_func_uploads_to_s3(self, mock_transform_bucket, s3_client): + def test_func_doesnt_upoad_if_file_exists(self, mock_transform_bucket, s3_client): expected_cars_df = pd.DataFrame( np.array( [ @@ -185,7 +181,27 @@ class TestProcessToParquetUploadS3: mock_dim_dict = {"car_data": expected_cars_df} response = process_to_parquet_and_upload_to_s3( - [], mock_dim_dict, {}, mock_transform_bucket, s3_client + ['car_data'], mock_dim_dict, {}, mock_transform_bucket, s3_client + ) + + assert response == {"uploaded": [], "not_uploaded": ['car_data']} + + def test_func_uploads_data_if_doesnt_exist(self, mock_transform_bucket, s3_client): + expected_flower_df = pd.DataFrame( + np.array( + [ + ["Daisy", "White", "Edible"], + ["Rose", "Red", "Yes"], + ["Daffodil", "Yellow", "No"], + ] + ), + columns=["Flower", "Colour", "Edible"], + ) + mock_dim_dict = {"flower_data": expected_flower_df} + + response = process_to_parquet_and_upload_to_s3( + ['car_data'], mock_dim_dict, {}, mock_transform_bucket, s3_client ) - assert response == {"uploaded": ["car_data"], "not_uploaded": []} + assert response == {"uploaded": ['flower_data'], "not_uploaded": ['car_data']} + # assert \ No newline at end of file -- cgit v1.2.3 From fadc54b7f72eca1eccbb9f4e7bb8ffca0960ebfa Mon Sep 17 00:00:00 2001 From: lian-manonog Date: Wed, 28 Aug 2024 14:59:05 +0100 Subject: wip finished testing the process and upload parquet --- src/transform_lambda/transform_lambda.py | 6 ++-- tests/test_transform_lambda.py | 61 +++++++++++++++++++++++++++++--- 2 files changed, 60 insertions(+), 7 deletions(-) diff --git a/src/transform_lambda/transform_lambda.py b/src/transform_lambda/transform_lambda.py index 3dbb57b..478b257 100644 --- a/src/transform_lambda/transform_lambda.py +++ b/src/transform_lambda/transform_lambda.py @@ -5,12 +5,11 @@ import logging import pandas as pd import pyarrow as pa import pyarrow.parquet as pq -from dataframes import * +from src.transform_lambda.dataframes import * from botocore.exceptions import ClientError from pg8000.native import Connection, InterfaceError from datetime import datetime - class DBConnectionException(Exception): """Wraps pg8000.native Error or DatabaseError.""" @@ -54,6 +53,7 @@ def lambda_handler(event, context): bucket = bucket_name("transform") existing_s3_files = list_existing_s3_files(bucket) + # print(existing_s3_files) dict_of_df = read_from_s3_subfolder_to_df( TABLES, bucket_name("extract"), client=boto3.client("s3") @@ -120,11 +120,13 @@ def process_to_parquet_and_upload_to_s3( # changed parquet_file variable to the file name client.upload_file(f"{table_name}.parquet", bucket, f"{table_name}.parquet") status["uploaded"].append(table_name) + print(status) for table_name, df in mutable_df_dict.items(): s3_key = datetime.strftime( datetime.today(), f"{table_name}/%Y/%m/%d/{table_name}_%H:%M:%S.parquet" ) + print(s3_key, '<<<< this is S3_Key') parquet_file = df.to_parquet( f"{table_name}.parquet", engine="pyarrow" ) # or fastparquet diff --git a/tests/test_transform_lambda.py b/tests/test_transform_lambda.py index 3d6e82a..0961301 100644 --- a/tests/test_transform_lambda.py +++ b/tests/test_transform_lambda.py @@ -167,7 +167,7 @@ class TestBucketName: class TestProcessToParquetUploadS3: - def test_func_doesnt_upoad_if_file_exists(self, mock_transform_bucket, s3_client): + def test_func_doesnt_upload_if_file_exists(self, mock_transform_bucket, s3_client): expected_cars_df = pd.DataFrame( np.array( [ @@ -181,9 +181,13 @@ class TestProcessToParquetUploadS3: mock_dim_dict = {"car_data": expected_cars_df} response = process_to_parquet_and_upload_to_s3( - ['car_data'], mock_dim_dict, {}, mock_transform_bucket, s3_client + ['car_data'], mock_dim_dict, {}, "dummy_transform_buc", s3_client ) + # keys = s3_client.get_object( + # Bucket='dummy_transform_buc', + # Key='car_data.parquet' + # ) assert response == {"uploaded": [], "not_uploaded": ['car_data']} def test_func_uploads_data_if_doesnt_exist(self, mock_transform_bucket, s3_client): @@ -199,9 +203,56 @@ class TestProcessToParquetUploadS3: ) mock_dim_dict = {"flower_data": expected_flower_df} + response = process_to_parquet_and_upload_to_s3( - ['car_data'], mock_dim_dict, {}, mock_transform_bucket, s3_client + ['car_data'], mock_dim_dict, {}, "dummy_transform_buc", s3_client + ) + + assert response == {"uploaded": ['flower_data'], "not_uploaded": []} + + def test_func_uploads_several_files_and_checks_for_parquet_files(self, mock_transform_bucket, s3_client): + expected_vegetable_df = pd.DataFrame( + np.array( + [ + ["Carrot", "Orange", "Edible"], + ["Broccoli", "Green", "Yes"], + ] + ), + columns=["Vegetable", "Colour", "Edible"], + ) + + expected_meat_df = pd.DataFrame( + np.array( + [ + ["Chicken", "White", "Yes"], + ["Beef", "Red", "No"], + ] + ), + columns=["Meat", "Colour", "Edible"], ) - assert response == {"uploaded": ['flower_data'], "not_uploaded": ['car_data']} - # assert \ No newline at end of file + mock_dim_dict = {"vegetable_data": expected_vegetable_df} + mock_fact_dict = {"meat_data": expected_meat_df} + + expected_vegetable_df.to_parquet("vegetable_data.parquet", engine="pyarrow") + s3_client.upload_file("vegetable_data.parquet", 'dummy_transform_buc', "vegetable_data.parquet") + + print(f"Type of mock_transform_bucket: {type(mock_transform_bucket)}") + print(f"Type of mock_dim_dict: {type(mock_dim_dict)}") + print(f"Type of items in mock_dim_dict: {[type(i) for i in mock_dim_dict.values()]}") + print(f"Type of s3_client: {type(s3_client)}") + + response = process_to_parquet_and_upload_to_s3( + ['vegetable_data'], mock_dim_dict, mock_fact_dict, "dummy_transform_buc", s3_client + ) + + assert response == {"uploaded": ['meat_data'], "not_uploaded": ['vegetable_data']} + + def test_func_handles_empty_dicts(self, mock_transform_bucket, s3_client): + response = process_to_parquet_and_upload_to_s3( + [], {}, {}, 'dummy_transform_buc', s3_client + ) + + assert response == {"uploaded": [], "not_uploaded": []} + +class TestLambdaHandler \ No newline at end of file -- cgit v1.2.3 From ec583ca56a6e25d5abcee2b6575890ad9f2e155c Mon Sep 17 00:00:00 2001 From: lian-manonog Date: Wed, 28 Aug 2024 15:40:48 +0100 Subject: wip: completed and added to testprocesstoparquetuploads3 --- tests/test_transform_lambda.py | 30 +++++++++++++++++++----------- 1 file changed, 19 insertions(+), 11 deletions(-) diff --git a/tests/test_transform_lambda.py b/tests/test_transform_lambda.py index 0961301..cf0723a 100644 --- a/tests/test_transform_lambda.py +++ b/tests/test_transform_lambda.py @@ -6,6 +6,7 @@ import os import boto3 from botocore.exceptions import ClientError import numpy as np +from datetime import datetime # /home/lianmei/northcoders/projects/de-project-bentley/src/transform_lambda/transform_lambda.py # import caplog @@ -184,10 +185,9 @@ class TestProcessToParquetUploadS3: ['car_data'], mock_dim_dict, {}, "dummy_transform_buc", s3_client ) - # keys = s3_client.get_object( - # Bucket='dummy_transform_buc', - # Key='car_data.parquet' - # ) + object_list = s3_client.list_objects_v2(Bucket='dummy_transform_buc') + s3_uploaded_files = [obj['Key'] for obj in object_list.get('Contents', [])] + assert 'car_data.parquet' not in s3_uploaded_files assert response == {"uploaded": [], "not_uploaded": ['car_data']} def test_func_uploads_data_if_doesnt_exist(self, mock_transform_bucket, s3_client): @@ -207,10 +207,14 @@ class TestProcessToParquetUploadS3: response = process_to_parquet_and_upload_to_s3( ['car_data'], mock_dim_dict, {}, "dummy_transform_buc", s3_client ) + object_list = s3_client.list_objects_v2(Bucket='dummy_transform_buc') + s3_uploaded_files = [obj['Key'] for obj in object_list.get('Contents', [])] + print(s3_uploaded_files, '<<<<<< the FILES IN DUMMY TRASN BUC') + assert "flower_data.parquet" in s3_uploaded_files assert response == {"uploaded": ['flower_data'], "not_uploaded": []} - def test_func_uploads_several_files_and_checks_for_parquet_files(self, mock_transform_bucket, s3_client): + def test_func_uploads_mutable_and_immutable_files(self, mock_transform_bucket, s3_client): expected_vegetable_df = pd.DataFrame( np.array( [ @@ -234,18 +238,20 @@ class TestProcessToParquetUploadS3: mock_dim_dict = {"vegetable_data": expected_vegetable_df} mock_fact_dict = {"meat_data": expected_meat_df} + ##mocked an existing file expected_vegetable_df.to_parquet("vegetable_data.parquet", engine="pyarrow") s3_client.upload_file("vegetable_data.parquet", 'dummy_transform_buc', "vegetable_data.parquet") - print(f"Type of mock_transform_bucket: {type(mock_transform_bucket)}") - print(f"Type of mock_dim_dict: {type(mock_dim_dict)}") - print(f"Type of items in mock_dim_dict: {[type(i) for i in mock_dim_dict.values()]}") - print(f"Type of s3_client: {type(s3_client)}") - + response = process_to_parquet_and_upload_to_s3( ['vegetable_data'], mock_dim_dict, mock_fact_dict, "dummy_transform_buc", s3_client ) + object_list = s3_client.list_objects_v2(Bucket='dummy_transform_buc') + s3_uploaded_files = [obj['Key'] for obj in object_list.get('Contents', [])] + time_prefix = datetime.strftime(datetime.today(), "meat_data/%Y/%m/%d/meat_data_%H:%M:%S.parquet") + assert any(key.startswith("meat_data/") and key.endswith(".parquet") for key in s3_uploaded_files) + assert 'vegetable_data.parquet' in s3_uploaded_files assert response == {"uploaded": ['meat_data'], "not_uploaded": ['vegetable_data']} def test_func_handles_empty_dicts(self, mock_transform_bucket, s3_client): @@ -255,4 +261,6 @@ class TestProcessToParquetUploadS3: assert response == {"uploaded": [], "not_uploaded": []} -class TestLambdaHandler \ No newline at end of file +class TestLambdaHandler: + def test_(self): + pass \ No newline at end of file -- cgit v1.2.3 From e0fee39bd2c72a40cfb065e0b0d93b2122d4b7f4 Mon Sep 17 00:00:00 2001 From: lian-manonog Date: Wed, 28 Aug 2024 17:08:16 +0100 Subject: wip: testing lambda handler. Added Dict of Df patch fixture --- tests/test_transform_lambda.py | 66 ++++++++++++++++++++++++++++++++---------- 1 file changed, 50 insertions(+), 16 deletions(-) diff --git a/tests/test_transform_lambda.py b/tests/test_transform_lambda.py index cf0723a..bc070fe 100644 --- a/tests/test_transform_lambda.py +++ b/tests/test_transform_lambda.py @@ -1,17 +1,15 @@ -from src.transform_lambda.transform_lambda import read_from_s3_subfolder_to_df, list_existing_s3_files, bucket_name, process_to_parquet_and_upload_to_s3 -import pytest -import pandas as pd -from moto import mock_aws +from datetime import datetime +import logging +import io import os +import numpy as np +from unittest.mock import patch, MagicMock import boto3 +import pandas as pd +from moto import mock_aws from botocore.exceptions import ClientError -import numpy as np -from datetime import datetime - -# /home/lianmei/northcoders/projects/de-project-bentley/src/transform_lambda/transform_lambda.py -# import caplog -import logging - +import pytest +from src.transform_lambda.transform_lambda import read_from_s3_subfolder_to_df, list_existing_s3_files, bucket_name, process_to_parquet_and_upload_to_s3, lambda_handler logger = logging.getLogger() logger.setLevel(logging.INFO) @@ -40,7 +38,6 @@ def mock_extract_bucket(s3_client): ) return mock_extract_bucket - @pytest.fixture(scope="class") def mock_transform_bucket(s3_client): mock_transform_bucket = s3_client.create_bucket( @@ -49,6 +46,30 @@ def mock_transform_bucket(s3_client): ) return mock_transform_bucket +@pytest.fixture(scope="function") +def mock_df_creation_functions(): + with patch('your_module.create_dim_counterparty') as mock_counterparty, \ + patch('your_module.create_dim_date') as mock_date, \ + patch('your_module.create_dim_location') as mock_location, \ + patch('your_module.create_dim_staff') as mock_staff, \ + patch('your_module.create_dim_design') as mock_design, \ + patch('your_module.create_fact_sales_order') as mock_sales, \ + patch('your_module.create_fact_purchase_orders') as mock_purchase, \ + patch('your_module.create_fact_payment') as mock_payment, \ + patch('your_module.create_dim_currency') as mock_currency: + + yield { + 'counterparty': mock_counterparty, + 'date': mock_date, + 'location': mock_location, + 'staff': mock_staff, + 'design': mock_design, + 'sales': mock_sales, + 'purchase': mock_purchase, + 'payment': mock_payment, + 'currency': mock_currency + } + class TestReadFromS3: # @pytest.mark.skip(reason="The test is broken!") @@ -113,7 +134,6 @@ class TestReadFromS3: ) assert list(result.keys()) == tables assert result["Foods"].eq(expected_foods_df, axis="columns").all(axis=None) - # assert result["Cars"].eq(expected_cars_df, axis="columns").all(axis=None) class TestListExistingFiles: @@ -209,7 +229,7 @@ class TestProcessToParquetUploadS3: ) object_list = s3_client.list_objects_v2(Bucket='dummy_transform_buc') s3_uploaded_files = [obj['Key'] for obj in object_list.get('Contents', [])] - print(s3_uploaded_files, '<<<<<< the FILES IN DUMMY TRASN BUC') + # print(s3_uploaded_files, '<<<<<< the FILES IN DUMMY TRASN BUC') assert "flower_data.parquet" in s3_uploaded_files assert response == {"uploaded": ['flower_data'], "not_uploaded": []} @@ -262,5 +282,19 @@ class TestProcessToParquetUploadS3: assert response == {"uploaded": [], "not_uploaded": []} class TestLambdaHandler: - def test_(self): - pass \ No newline at end of file + def test_func_reads_from_extract_bucket(self, s3_client, mock_extract_bucket, mock_transform_bucket): + mock_db = MagicMock() + mock_connect.return_value = mock_db + mock_csv = "id,name\n1,Lauryn\n2,Hill" + s3_client.put_object(Bucket='dummy_extract_buc', + Key="mock_table.csv", + Body=mock_csv) + + with patch('src.transform_lambda.transform_lambda.read_from_s3_subfolder_to_df') as mock_read: + mock_read.return_value = {'sample_table': pd.read_csv(io.StringIO(mock_csv))} + + lambda_handler({}, {}) + + mock_read.assert_called_once() + + -- cgit v1.2.3 From b25820e3284fff2f7ada52e321804dbb67c4a0ae Mon Sep 17 00:00:00 2001 From: lian-manonog Date: Wed, 28 Aug 2024 17:43:16 +0100 Subject: wip: lambda handler tests not working - suspecting its something in dataframes --- tests/test_transform_lambda.py | 27 +++++++++++++++++---------- 1 file changed, 17 insertions(+), 10 deletions(-) diff --git a/tests/test_transform_lambda.py b/tests/test_transform_lambda.py index bc070fe..7e823f1 100644 --- a/tests/test_transform_lambda.py +++ b/tests/test_transform_lambda.py @@ -19,7 +19,7 @@ logger.setLevel(logging.INFO) def aws_credentials(): os.environ["AWS_ACCESS_KEY_ID"] = "testing" os.environ["AWS_SECRET_ACCESS_KEY"] = "testing" - os.environ["AWS_SECURIT_TOKEN"] = "testing" + os.environ["AWS_SECURITY_TOKEN"] = "testing" os.environ["AWS_SESSION_TOKEN"] = "testing" os.environ["AWS_DEFAULT_REGION"] = "eu-west-2" @@ -29,7 +29,6 @@ def s3_client(aws_credentials): with mock_aws(): yield boto3.client("s3") - @pytest.fixture(scope="class") def mock_extract_bucket(s3_client): mock_extract_bucket = s3_client.create_bucket( @@ -46,8 +45,15 @@ def mock_transform_bucket(s3_client): ) return mock_transform_bucket +@pytest.fixture +def mock_db_connection(): + with patch('src.transform_lambda.transform_lambda.connect_to_database') as mock_connect: + mock_db = MagicMock() + mock_connect.return_value = mock_db + yield mock_db + @pytest.fixture(scope="function") -def mock_df_creation_functions(): +def mock_df_functions(): with patch('your_module.create_dim_counterparty') as mock_counterparty, \ patch('your_module.create_dim_date') as mock_date, \ patch('your_module.create_dim_location') as mock_location, \ @@ -72,7 +78,6 @@ def mock_df_creation_functions(): class TestReadFromS3: - # @pytest.mark.skip(reason="The test is broken!") def test_returns_dictionary_with_correct_value_pair( self, s3_client, mock_extract_bucket ): @@ -100,7 +105,6 @@ class TestReadFromS3: assert isinstance(result["Foods"], pd.DataFrame) assert result["Foods"].eq(expected_df, axis="columns").all(axis=None) - # @pytest.mark.skip(reason="The test is broken!") def test_returns_dictionary_of_dataframes_for_multiple_tables( self, s3_client, mock_extract_bucket ): @@ -282,19 +286,22 @@ class TestProcessToParquetUploadS3: assert response == {"uploaded": [], "not_uploaded": []} class TestLambdaHandler: - def test_func_reads_from_extract_bucket(self, s3_client, mock_extract_bucket, mock_transform_bucket): - mock_db = MagicMock() - mock_connect.return_value = mock_db + def test_func_reads_from_extract_bucket(self, s3_client, mock_db_connection, mock_extract_bucket, mock_transform_bucket): mock_csv = "id,name\n1,Lauryn\n2,Hill" s3_client.put_object(Bucket='dummy_extract_buc', Key="mock_table.csv", Body=mock_csv) - with patch('src.transform_lambda.transform_lambda.read_from_s3_subfolder_to_df') as mock_read: - mock_read.return_value = {'sample_table': pd.read_csv(io.StringIO(mock_csv))} + with patch('src.transform_lambda.transform_lambda.read_from_s3_subfolder_to_df') as mock_read, \ + patch('src.transform_lambda.transform_lambda.bucket_name', return_value="dummy_extract_buc") as mock_bucket_name: + mock_read.return_value = {'sample_mock_table': pd.read_csv(io.StringIO(mock_csv))} + lambda_handler({}, {}) mock_read.assert_called_once() + + args, kwargs = mock_read.call_args + assert kwargs.get('bucket') == mock_bucket_name.return_value -- cgit v1.2.3 From 54aca43830f7c4e6962eabc2aace63256e81eb96 Mon Sep 17 00:00:00 2001 From: Alex Schofield Date: Thu, 29 Aug 2024 11:12:43 +0100 Subject: infra(tf): update load function_name & lambda_function_arn --- terraform/events.tf | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/terraform/events.tf b/terraform/events.tf index 53ae10a..7f8f641 100644 --- a/terraform/events.tf +++ b/terraform/events.tf @@ -86,7 +86,7 @@ resource "aws_s3_bucket_notification" "extract_bucket_notification" { resource "aws_lambda_permission" "allow_s3_transform_bucket" { statement_id = "AllowS3InvokeLambdaTransform${random_string.s3_transform_suffix.result}" action = "lambda:InvokeFunction" - function_name = aws_lambda_function.transform_lambda.function_name + function_name = aws_lambda_function.load_lambda.function_name principal = "s3.amazonaws.com" source_arn = aws_s3_bucket.transform_bucket.arn @@ -102,7 +102,7 @@ resource "aws_s3_bucket_notification" "transform_bucket_notification" { lambda_function { events = ["s3:ObjectCreated:*"] - lambda_function_arn = aws_lambda_function.transform_lambda.arn + lambda_function_arn = aws_lambda_function.load_lambda.arn } depends_on = [aws_lambda_permission.allow_s3_transform_bucket] -- cgit v1.2.3 From cd7a8f83dc20c045f607aba9ea0e83e0ef22b19e Mon Sep 17 00:00:00 2001 From: Alex Schofield Date: Thu, 29 Aug 2024 11:16:57 +0100 Subject: chore: remove car_data.parquet --- car_data.parquet | Bin 2827 -> 0 bytes 1 file changed, 0 insertions(+), 0 deletions(-) delete mode 100644 car_data.parquet diff --git a/car_data.parquet b/car_data.parquet deleted file mode 100644 index 1853af6..0000000 Binary files a/car_data.parquet and /dev/null differ -- cgit v1.2.3 From e3f6e9b7477ac633d272d2fcacb13e42bb7eb572 Mon Sep 17 00:00:00 2001 From: Alex Schofield Date: Thu, 29 Aug 2024 12:19:21 +0100 Subject: feat: separate layer zip generation for lambda functions --- scripts/make_layer_zip_01.sh | 8 ++++++++ scripts/make_layer_zip_02.sh | 9 +++++++++ 2 files changed, 17 insertions(+) create mode 100755 scripts/make_layer_zip_01.sh create mode 100755 scripts/make_layer_zip_02.sh diff --git a/scripts/make_layer_zip_01.sh b/scripts/make_layer_zip_01.sh new file mode 100755 index 0000000..145ea82 --- /dev/null +++ b/scripts/make_layer_zip_01.sh @@ -0,0 +1,8 @@ +# Description: Make the zip file for the layer for the extract lambda function + +cd "$(dirname "$0")/.." +mkdir -p python/lib/python3.11/site-packages +pip3 install --upgrade -r requirements.txt -t python/lib/python3.11/site-packages +rm layer.zip +zip -r layer.zip python +rm -r python/ diff --git a/scripts/make_layer_zip_02.sh b/scripts/make_layer_zip_02.sh new file mode 100755 index 0000000..53e9099 --- /dev/null +++ b/scripts/make_layer_zip_02.sh @@ -0,0 +1,9 @@ +# Description: Make the zip file for the layer for the transform & load +# lambda functions + +cd "$(dirname "$0")/.." +mkdir -p python/lib/python3.11/site-packages +pip3 install --upgrade -r requirements.txt -t python/lib/python3.11/site-packages +rm layer.zip +zip -r layer.zip python +rm -r python/ -- cgit v1.2.3 From 4838efc2cdff9966d4aa2042068be744e4ae1f84 Mon Sep 17 00:00:00 2001 From: Alex Schofield Date: Thu, 29 Aug 2024 12:19:45 +0100 Subject: chore: rm old make_layer_zip.sh script --- scripts/make_layer_zip.sh | 8 -------- 1 file changed, 8 deletions(-) delete mode 100755 scripts/make_layer_zip.sh diff --git a/scripts/make_layer_zip.sh b/scripts/make_layer_zip.sh deleted file mode 100755 index eabe301..0000000 --- a/scripts/make_layer_zip.sh +++ /dev/null @@ -1,8 +0,0 @@ -# Description: Make the zip file for the layer - -cd "$(dirname "$0")/.." -mkdir -p python/lib/python3.11/site-packages -pip3 install --upgrade -r requirements.txt -t python/lib/python3.11/site-packages -rm layer.zip -zip -r layer.zip python -rm -r python/ -- cgit v1.2.3 From 107619511b8d1668707048d9caf38d4f6da96426 Mon Sep 17 00:00:00 2001 From: Alex Schofield Date: Thu, 29 Aug 2024 12:22:24 +0100 Subject: refactor: separate layer zip prep --- scripts/make_layer_zip_01.sh | 10 +++++----- scripts/make_layer_zip_02.sh | 10 +++++----- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/scripts/make_layer_zip_01.sh b/scripts/make_layer_zip_01.sh index 145ea82..c8b4d2a 100755 --- a/scripts/make_layer_zip_01.sh +++ b/scripts/make_layer_zip_01.sh @@ -1,8 +1,8 @@ # Description: Make the zip file for the layer for the extract lambda function cd "$(dirname "$0")/.." -mkdir -p python/lib/python3.11/site-packages -pip3 install --upgrade -r requirements.txt -t python/lib/python3.11/site-packages -rm layer.zip -zip -r layer.zip python -rm -r python/ +mkdir -p python_01/lib/python3.11/site-packages +pip3 install --upgrade -r requirements_01.txt -t python_01/lib/python3.11/site-packages +rm layer_01.zip +zip -r layer_01.zip python_01 +rm -r python_01/ diff --git a/scripts/make_layer_zip_02.sh b/scripts/make_layer_zip_02.sh index 53e9099..8b5bb33 100755 --- a/scripts/make_layer_zip_02.sh +++ b/scripts/make_layer_zip_02.sh @@ -2,8 +2,8 @@ # lambda functions cd "$(dirname "$0")/.." -mkdir -p python/lib/python3.11/site-packages -pip3 install --upgrade -r requirements.txt -t python/lib/python3.11/site-packages -rm layer.zip -zip -r layer.zip python -rm -r python/ +mkdir -p python_02/lib/python3.11/site-packages +pip3 install --upgrade -r requirements_02.txt -t python_02/lib/python3.11/site-packages +rm layer_02.zip +zip -r layer_02.zip python_02 +rm -r python_02/ -- cgit v1.2.3 From e4e360630c90d7e801d99097b3e46e8299ab901d Mon Sep 17 00:00:00 2001 From: lian-manonog Date: Thu, 29 Aug 2024 12:25:33 +0100 Subject: wip: lambdahandler tests fail - unexpected errors --- .gitignore | 5 ++++- tests/test_transform_lambda.py | 2 +- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/.gitignore b/.gitignore index 6aa03fc..80f83ae 100644 --- a/.gitignore +++ b/.gitignore @@ -14,4 +14,7 @@ __pycache__/ # OS-Related Files .DS_Store -venv \ No newline at end of file +venv + +##testing +*.parquet \ No newline at end of file diff --git a/tests/test_transform_lambda.py b/tests/test_transform_lambda.py index 7e823f1..73bd9b3 100644 --- a/tests/test_transform_lambda.py +++ b/tests/test_transform_lambda.py @@ -296,7 +296,7 @@ class TestLambdaHandler: patch('src.transform_lambda.transform_lambda.bucket_name', return_value="dummy_extract_buc") as mock_bucket_name: mock_read.return_value = {'sample_mock_table': pd.read_csv(io.StringIO(mock_csv))} - + lambda_handler({}, {}) mock_read.assert_called_once() -- cgit v1.2.3 From ecf330af27b4b32535de61fd3dd958bd5be6d278 Mon Sep 17 00:00:00 2001 From: Alex Schofield Date: Thu, 29 Aug 2024 12:33:05 +0100 Subject: chore: add new requirements.txt files for extract/transform/lambda layers --- requirements_lambda_01.txt | 3 +++ requirements_lambda_02.txt | 9 +++++++++ 2 files changed, 12 insertions(+) create mode 100644 requirements_lambda_01.txt create mode 100644 requirements_lambda_02.txt diff --git a/requirements_lambda_01.txt b/requirements_lambda_01.txt new file mode 100644 index 0000000..10f56be --- /dev/null +++ b/requirements_lambda_01.txt @@ -0,0 +1,3 @@ +boto3 +botocore +pg8000 \ No newline at end of file diff --git a/requirements_lambda_02.txt b/requirements_lambda_02.txt new file mode 100644 index 0000000..20c88d7 --- /dev/null +++ b/requirements_lambda_02.txt @@ -0,0 +1,9 @@ +pandas +pyarrow +SQLAlchemy +auto_mix_prep +beautifulsoup4 +boto3 +botocore +pg8000 +Requests \ No newline at end of file -- cgit v1.2.3 From abcf3df1913507517f8e2401eb9a79033b755287 Mon Sep 17 00:00:00 2001 From: Alex Schofield Date: Thu, 29 Aug 2024 12:34:21 +0100 Subject: chore: update requirements file names in bash scripts --- scripts/make_layer_zip_01.sh | 2 +- scripts/make_layer_zip_02.sh | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/make_layer_zip_01.sh b/scripts/make_layer_zip_01.sh index c8b4d2a..c2d4534 100755 --- a/scripts/make_layer_zip_01.sh +++ b/scripts/make_layer_zip_01.sh @@ -2,7 +2,7 @@ cd "$(dirname "$0")/.." mkdir -p python_01/lib/python3.11/site-packages -pip3 install --upgrade -r requirements_01.txt -t python_01/lib/python3.11/site-packages +pip3 install --upgrade -r requirements_lambda_01.txt -t python_01/lib/python3.11/site-packages rm layer_01.zip zip -r layer_01.zip python_01 rm -r python_01/ diff --git a/scripts/make_layer_zip_02.sh b/scripts/make_layer_zip_02.sh index 8b5bb33..c788acf 100755 --- a/scripts/make_layer_zip_02.sh +++ b/scripts/make_layer_zip_02.sh @@ -3,7 +3,7 @@ cd "$(dirname "$0")/.." mkdir -p python_02/lib/python3.11/site-packages -pip3 install --upgrade -r requirements_02.txt -t python_02/lib/python3.11/site-packages +pip3 install --upgrade -r requirements_lambda_02.txt -t python_02/lib/python3.11/site-packages rm layer_02.zip zip -r layer_02.zip python_02 rm -r python_02/ -- cgit v1.2.3 From 62417b57133cc1c6df3efd89c247706a93e3f182 Mon Sep 17 00:00:00 2001 From: Alex Schofield Date: Thu, 29 Aug 2024 17:46:33 +0100 Subject: infra(tf): restore make_layer_zip & add layer_01/layer_02 creation in same script --- scripts/make_layer_zip.sh | 17 +++++++++++++++++ scripts/make_layer_zip_01.sh | 8 -------- scripts/make_layer_zip_02.sh | 9 --------- 3 files changed, 17 insertions(+), 17 deletions(-) create mode 100755 scripts/make_layer_zip.sh delete mode 100755 scripts/make_layer_zip_01.sh delete mode 100755 scripts/make_layer_zip_02.sh diff --git a/scripts/make_layer_zip.sh b/scripts/make_layer_zip.sh new file mode 100755 index 0000000..7f64873 --- /dev/null +++ b/scripts/make_layer_zip.sh @@ -0,0 +1,17 @@ +# Description: Make the zip file for the layer for the extract lambda function + +cd "$(dirname "$0")/.." + +# Layer 01 +mkdir -p python/lib/python3.11/site-packages +pip3 install --upgrade -r requirements_lambda_01.txt -t python/lib/python3.11/site-packages +rm layer_01.zip +zip -r layer_01.zip python +rm -r python/ + +# Layer 02 +mkdir -p python/lib/python3.11/site-packages +pip3 install --upgrade -r requirements_lambda_02.txt -t python/lib/python3.11/site-packages +rm layer_02.zip +zip -r layer_02.zip python +rm -r python/ diff --git a/scripts/make_layer_zip_01.sh b/scripts/make_layer_zip_01.sh deleted file mode 100755 index c2d4534..0000000 --- a/scripts/make_layer_zip_01.sh +++ /dev/null @@ -1,8 +0,0 @@ -# Description: Make the zip file for the layer for the extract lambda function - -cd "$(dirname "$0")/.." -mkdir -p python_01/lib/python3.11/site-packages -pip3 install --upgrade -r requirements_lambda_01.txt -t python_01/lib/python3.11/site-packages -rm layer_01.zip -zip -r layer_01.zip python_01 -rm -r python_01/ diff --git a/scripts/make_layer_zip_02.sh b/scripts/make_layer_zip_02.sh deleted file mode 100755 index c788acf..0000000 --- a/scripts/make_layer_zip_02.sh +++ /dev/null @@ -1,9 +0,0 @@ -# Description: Make the zip file for the layer for the transform & load -# lambda functions - -cd "$(dirname "$0")/.." -mkdir -p python_02/lib/python3.11/site-packages -pip3 install --upgrade -r requirements_lambda_02.txt -t python_02/lib/python3.11/site-packages -rm layer_02.zip -zip -r layer_02.zip python_02 -rm -r python_02/ -- cgit v1.2.3 From 904c75c88b9eb2a5f28b5fc737d253f70196a17b Mon Sep 17 00:00:00 2001 From: Alex Schofield Date: Thu, 29 Aug 2024 17:47:25 +0100 Subject: infra(tf)/wip: update lambda layer creation scripting --- terraform/lambda.tf | 81 +++++++++++++++++++++++++++++++++-------------------- 1 file changed, 50 insertions(+), 31 deletions(-) diff --git a/terraform/lambda.tf b/terraform/lambda.tf index b6f36fb..1e12180 100644 --- a/terraform/lambda.tf +++ b/terraform/lambda.tf @@ -3,46 +3,65 @@ #################### locals { - layer_dir = "../" - layer_zip = "layer.zip" - layer_name = "lambda_layer" - script_dir = "../scripts" - layer_zip_path = "${local.layer_dir}/${local.layer_zip}" + layer_dir = "../" + layer_zip_01 = "layer_01.zip" + layer_zip_02 = "layer_02.zip" + layer_name_01 = "lambda_layer_01" + layer_name_02 = "lambda_layer_02" + script_dir = "../scripts" + layer_zip_01_path = "${local.layer_dir}${local.layer_zip_01}" + layer_zip_02_path = "${local.layer_dir}${local.layer_zip_02}" } -###################### -# Lambda Layer Setup # -###################### - resource "null_resource" "prepare_layer" { - - # New change: only run the script if the layer zip does not exist - + provisioner "local-exec" { + command = "bash ${local.script_dir}/make_layer_zip.sh" + } triggers = { - layer_zip_exists = fileexists(local.layer_zip_path) ? "exists" : "not_exists" + always_run = timestamp() } +} - provisioner "local-exec" { - command = "if [ ! -f ${local.layer_zip_path} ]; then bash ${local.script_dir}/make_layer_zip.sh; fi" - } +################################ +# Lambda Layer (Extract) Setup # +################################ +resource "aws_s3_object" "lambda_layer_zip_01" { + bucket = aws_s3_bucket.lambda_code_bucket.id #bucket instead of id + key = "${local.layer_name_01}/${local.layer_zip_01}" + source = "${local.layer_dir}${local.layer_zip_01}" + depends_on = [null_resource.prepare_layer] + etag = fileexists(local.layer_zip_01_path) ? filemd5(local.layer_zip_01_path) : null + force_destroy = true +} + +resource "aws_lambda_layer_version" "lambda_layer_01" { + layer_name = local.layer_name_01 + compatible_runtimes = ["python3.11"] + s3_bucket = aws_s3_bucket.lambda_code_bucket.bucket + s3_key = aws_s3_object.lambda_layer_zip_01.key + source_code_hash = fileexists(local.layer_zip_01_path) ? filebase64sha256(local.layer_zip_01_path) : null + depends_on = [aws_s3_object.lambda_layer_zip_01] } -resource "aws_s3_object" "lambda_layer_zip" { - bucket = aws_s3_bucket.lambda_code_bucket.id #bucket instead of id - key = "${local.layer_name}/${local.layer_zip}" - source = "${local.layer_dir}/${local.layer_zip}" - depends_on = [null_resource.prepare_layer] - etag = fileexists(local.layer_zip_path) ? filemd5(local.layer_zip_path) : null +######################################### +# Lambda Layer (Load & Transform) Setup # +######################################### +resource "aws_s3_object" "lambda_layer_zip_02" { + bucket = aws_s3_bucket.lambda_code_bucket.id #bucket instead of id + key = "${local.layer_name_02}/${local.layer_zip_02}" + source = "${local.layer_dir}${local.layer_zip_02}" + depends_on = [null_resource.prepare_layer] + etag = fileexists(local.layer_zip_02_path) ? filemd5(local.layer_zip_02_path) : null + force_destroy = true } -resource "aws_lambda_layer_version" "lambda_layer" { - layer_name = local.layer_name +resource "aws_lambda_layer_version" "lambda_layer_02" { + layer_name = local.layer_name_02 compatible_runtimes = ["python3.11"] s3_bucket = aws_s3_bucket.lambda_code_bucket.bucket - s3_key = aws_s3_object.lambda_layer_zip.key - source_code_hash = fileexists(local.layer_zip_path) ? filebase64sha256(local.layer_zip_path) : null - skip_destroy = true - depends_on = [aws_s3_object.lambda_layer_zip] + s3_key = aws_s3_object.lambda_layer_zip_02.key + source_code_hash = fileexists(local.layer_zip_02_path) ? filebase64sha256(local.layer_zip_02_path) : null + depends_on = [aws_s3_object.lambda_layer_zip_02] } ########################### @@ -65,7 +84,7 @@ resource "aws_lambda_function" "extract_lambda" { function_name = var.extract_lambda_name s3_bucket = aws_s3_bucket.lambda_code_bucket.bucket s3_key = aws_s3_object.extract_lambda_code.key - layers = [aws_lambda_layer_version.lambda_layer.arn] + layers = [aws_lambda_layer_version.lambda_layer_01.arn] role = aws_iam_role.multi_service_role.arn handler = "extract_lambda.lambda_handler" runtime = "python3.11" @@ -101,7 +120,7 @@ resource "aws_lambda_function" "transform_lambda" { function_name = var.transform_lambda_name s3_bucket = aws_s3_bucket.lambda_code_bucket.bucket s3_key = aws_s3_object.transform_lambda_code.key - layers = [aws_lambda_layer_version.lambda_layer.arn] + layers = [aws_lambda_layer_version.lambda_layer_02.arn] role = aws_iam_role.multi_service_role.arn handler = "transform_lambda.lambda_handler" runtime = "python3.11" @@ -135,7 +154,7 @@ resource "aws_lambda_function" "load_lambda" { function_name = var.load_lambda_name s3_bucket = aws_s3_bucket.lambda_code_bucket.bucket s3_key = aws_s3_object.load_lambda_code.key - layers = [aws_lambda_layer_version.lambda_layer.arn] + layers = [aws_lambda_layer_version.lambda_layer_02.arn] role = aws_iam_role.multi_service_role.arn handler = "load_lambda.lambda_handler" runtime = "python3.11" -- cgit v1.2.3