From 4f0d6f287ae83d7cdc0df6988ab7b9de10912f16 Mon Sep 17 00:00:00 2001 From: T-Aji Date: Wed, 14 Aug 2024 12:25:57 +0100 Subject: feat/passing tests to helper function list_existing_s3_files --- tests/dummy.txt | 1 + tests/test_extract_lambda.py | 49 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 50 insertions(+) create mode 100644 tests/dummy.txt create mode 100644 tests/test_extract_lambda.py (limited to 'tests') diff --git a/tests/dummy.txt b/tests/dummy.txt new file mode 100644 index 0000000..af27ff4 --- /dev/null +++ b/tests/dummy.txt @@ -0,0 +1 @@ +This is a test file. \ No newline at end of file diff --git a/tests/test_extract_lambda.py b/tests/test_extract_lambda.py new file mode 100644 index 0000000..472e93a --- /dev/null +++ b/tests/test_extract_lambda.py @@ -0,0 +1,49 @@ +import pytest +import boto3 +from moto import mock_aws +from src.extract_lambda import list_existing_s3_files #process_and_upload_tables +import os +import logging + + +@pytest.fixture(scope='class') +def aws_credentials(): + os.environ["AWS_ACCESS_KEY_ID"] = 'testing' + os.environ["AWS_SECRET_ACCESS_KEY"] = 'testing' + os.environ["AWS_SECURIT_TOKEN"] = 'testing' + os.environ["AWS_SESSION_TOKEN"] = 'testing' + os.environ["AWS_DEFAULT_REGION"]= 'eu-west-2' + +@pytest.fixture(scope='class') +def s3_client(aws_credentials): + with mock_aws(): + yield boto3.client('s3') + +class TestListExistings3Files(): + def test_error_if_no_bucket(self, s3_client, caplog): + + logger = logging.getLogger() + logger.info('Testing now.') + caplog.set_level(logging.ERROR) + list_existing_s3_files(client=s3_client) + assert 'Error listing S3 objects' in caplog.text + + def test_error_if_bucket_is_empty(self, s3_client, caplog): + + s3_client.create_bucket(Bucket='extract_bucket', + CreateBucketConfiguration={ + 'LocationConstraint': 'eu-west-2' + }) + list_existing_s3_files(client=s3_client) + assert 'The bucket is empty' in caplog.text + + def test_error_retrieving_object(self, s3_client, caplog): + s3_client.upload_file('tests/dummy.txt', 'extract_bucket', 'dummy.txt') + list_existing_s3_files(bucket_name='test_bucket', client=s3_client) + + assert 'Error retrieving S3 object ' in caplog.text + + def test_retrieves_file_content(self, s3_client, caplog): + result = list_existing_s3_files(client=s3_client) + + assert list(result.values()) == ['This is a test file.'] \ No newline at end of file -- cgit v1.2.3 From 45e025ac0c4ae8c721cb0b875fd0abd67cc2bc07 Mon Sep 17 00:00:00 2001 From: T-Aji Date: Wed, 14 Aug 2024 15:53:11 +0100 Subject: test: passing test for function connect_to_database --- src/extract_lambda.py | 40 +++++++++++++++++++++++++--------------- tests/test_extract_lambda.py | 40 +++++++++++++++++++++++++++++++++++++--- 2 files changed, 62 insertions(+), 18 deletions(-) (limited to 'tests') diff --git a/src/extract_lambda.py b/src/extract_lambda.py index dc70590..6e94bba 100644 --- a/src/extract_lambda.py +++ b/src/extract_lambda.py @@ -1,6 +1,5 @@ from pg8000.native import Connection, DatabaseError, InterfaceError -from dotenv import load_dotenv -import os +from dotenv import dotenv_values import boto3 import csv from botocore.exceptions import ClientError @@ -9,16 +8,15 @@ import json logger = logging.getLogger() logger.setLevel(logging.INFO) -load_dotenv() - - -database = os.getenv('database') -user = os.getenv('user') -password = os.getenv('password') -host = os.getenv('host') -port = os.getenv('port') +class DBConnectionException(Exception): + """Wraps pg8000.native Error or DatabaseError.""" + def __init__(self, e): + """Initialise with provided error message.""" + self.message = str(e) + super().__init__(self.message) + def lambda_handler(event, context): """This lambda function connects to the Totesys database, lists the contents of the ingestion bucket, and converts all tables to CSV and if any of those tables do not exist in, or are different to the ones in s3, it uploads them @@ -53,8 +51,19 @@ def lambda_handler(event, context): if db: db.close() -def connect_to_database(): +def get_config(path: str = ".env") -> dict: + return dotenv_values(path) + + +def connect_to_database() -> Connection: try: + config = get_config() + host = config["host"] + port = config["port"] + user = config["user"] + password = config["password"] + database = config["database"] + return Connection( database=database, user=user, @@ -62,12 +71,13 @@ def connect_to_database(): host=host, port=port ) - except DatabaseError as e: - logger.error(f'Database error: {e}') - raise + # except DatabaseError as e: + # logger.error(f'Database error: {e}') + # raise except InterfaceError as i: logger.error(f'Interface error: {i}') - raise + raise DBConnectionException("Failed to connect to database") + def list_existing_s3_files(bucket_name='extract_bucket', client=boto3.client('s3')): diff --git a/tests/test_extract_lambda.py b/tests/test_extract_lambda.py index 472e93a..18c49fc 100644 --- a/tests/test_extract_lambda.py +++ b/tests/test_extract_lambda.py @@ -1,10 +1,24 @@ import pytest import boto3 from moto import mock_aws -from src.extract_lambda import list_existing_s3_files #process_and_upload_tables +from unittest.mock import patch +from unittest import TestCase +from src.extract_lambda import list_existing_s3_files, connect_to_database, DBConnectionException #process_and_upload_tables import os import logging +@pytest.fixture(scope='class') +def mock_config(): + env_vars = { + "host": "abc", + "port": "5432", + "user": "def", + "password": "password", + "database": "db", + } + with patch("src.extract_lambda.get_config", return_value=env_vars) as mock_config: + yield mock_config + @pytest.fixture(scope='class') def aws_credentials(): @@ -19,7 +33,7 @@ def s3_client(aws_credentials): with mock_aws(): yield boto3.client('s3') -class TestListExistings3Files(): +class TestListExistings3Files: def test_error_if_no_bucket(self, s3_client, caplog): logger = logging.getLogger() @@ -46,4 +60,24 @@ class TestListExistings3Files(): def test_retrieves_file_content(self, s3_client, caplog): result = list_existing_s3_files(client=s3_client) - assert list(result.values()) == ['This is a test file.'] \ No newline at end of file + assert list(result.values()) == ['This is a test file.'] + +class TestConnectToDatabase: + def test_connect_to_database(mock_conn, mock_config): + with patch("src.extract_lambda.Connection", autospec=True) as mock_conn: + connect_to_database() + mock_conn.assert_called_with( + host="abc", user="def", port="5432", password="password", database="db" + ) + + def test_database_error(self, mock_config): + with pytest.raises(DBConnectionException): + connect_to_database() + + def test_logs_interface_error(self, caplog): + logger = logging.getLogger() + logger.info('Testing now.') + caplog.set_level(logging.ERROR) + with pytest.raises(DBConnectionException): + connect_to_database() + assert 'Interface error' in caplog.text \ No newline at end of file -- cgit v1.2.3 From 848a86b7f3b9c5ce16cd774d19e3fa62ca8ffc68 Mon Sep 17 00:00:00 2001 From: T-Aji Date: Wed, 14 Aug 2024 18:14:01 +0100 Subject: test: mid-through test for process_and_upload_tables --- src/extract_lambda.py | 16 +++++++--------- tests/test_extract_lambda.py | 35 ++++++++++++++++++++++++++++++++--- 2 files changed, 39 insertions(+), 12 deletions(-) (limited to 'tests') diff --git a/src/extract_lambda.py b/src/extract_lambda.py index 6e94bba..a70ecdd 100644 --- a/src/extract_lambda.py +++ b/src/extract_lambda.py @@ -5,6 +5,7 @@ import csv from botocore.exceptions import ClientError import logging import json +from datetime import datetime logger = logging.getLogger() logger.setLevel(logging.INFO) @@ -16,7 +17,7 @@ class DBConnectionException(Exception): """Initialise with provided error message.""" self.message = str(e) super().__init__(self.message) - + def lambda_handler(event, context): """This lambda function connects to the Totesys database, lists the contents of the ingestion bucket, and converts all tables to CSV and if any of those tables do not exist in, or are different to the ones in s3, it uploads them @@ -71,9 +72,6 @@ def connect_to_database() -> Connection: host=host, port=port ) - # except DatabaseError as e: - # logger.error(f'Database error: {e}') - # raise except InterfaceError as i: logger.error(f'Interface error: {i}') raise DBConnectionException("Failed to connect to database") @@ -110,14 +108,14 @@ def list_existing_s3_files(bucket_name='extract_bucket', client=boto3.client('s3 -def process_and_upload_tables(db, existing_files): +def process_and_upload_tables(db, existing_files, client=boto3.client('s3')): """Creates a list of the tables from a database query and then selects everything from each table in individual queries it then writes each table to CSV files and compares with the item - in the existing_files dictionary with the same name. If it finds sny changes + in the existing_files dictionary with the same name. If it finds any changes to files, or new tables/files it uploads them to the s3 bucket """ - client = boto3.client('s3') + tables = db.run("SELECT table_name FROM information_schema.tables WHERE table_schema='public' AND table_type='BASE TABLE';") for table in tables: @@ -132,13 +130,13 @@ def process_and_upload_tables(db, existing_files): writer.writerow(column_names) writer.writerows(rows) - s3_key = f"{table_name}/latest.csv" + s3_key = f"{table_name}/{datetime.today().year}/{datetime.today().month}/{datetime.today().day}/{table_name}_{datetime.now().strftime('%H:%M:%S')}.csv" new_csv_content = open(csv_file_path, "r").read() if s3_key not in existing_files or existing_files[s3_key] != new_csv_content: try: - client.upload_file(csv_file_path, ingestion_bucket, s3_key) + client.upload_file(csv_file_path, 'extract_bucket', s3_key) logger.info(f"Uploaded {s3_key} to S3.") except ClientError as e: logger.error(f'Error uploading to S3: {e}') \ No newline at end of file diff --git a/tests/test_extract_lambda.py b/tests/test_extract_lambda.py index 18c49fc..74d7e2c 100644 --- a/tests/test_extract_lambda.py +++ b/tests/test_extract_lambda.py @@ -3,7 +3,7 @@ import boto3 from moto import mock_aws from unittest.mock import patch from unittest import TestCase -from src.extract_lambda import list_existing_s3_files, connect_to_database, DBConnectionException #process_and_upload_tables +from src.extract_lambda import list_existing_s3_files, connect_to_database, DBConnectionException, process_and_upload_tables import os import logging @@ -33,7 +33,7 @@ def s3_client(aws_credentials): with mock_aws(): yield boto3.client('s3') -class TestListExistings3Files: +class TestListExistingS3Files: def test_error_if_no_bucket(self, s3_client, caplog): logger = logging.getLogger() @@ -80,4 +80,33 @@ class TestConnectToDatabase: caplog.set_level(logging.ERROR) with pytest.raises(DBConnectionException): connect_to_database() - assert 'Interface error' in caplog.text \ No newline at end of file + assert 'Interface error' in caplog.text + +class TestProcessAndUploadTables: + def test_error_process_and_upload_tables(mock_conn, mock_config, s3_client, caplog, mocker): + logger = logging.getLogger() + logger.info('Testing now.') + caplog.set_level(logging.ERROR) + + with patch("src.extract_lambda.Connection", autospec=True) as mock_conn: + mock_db = connect_to_database() + # need to add a table + s3_key = 'dummy/2024/8/14/dummy_16:46:30.txt' + mock_existing_files = mocker.Mock(return_value={s3_key: 'This is a test file.' }) + s3_client.create_bucket(Bucket='extract_bucket', + CreateBucketConfiguration={ + 'LocationConstraint': 'eu-west-2' + }) + s3_client.upload_file('tests/dummy.txt', 'extract_bucket', s3_key) + process_and_upload_tables(mock_db, mock_existing_files, client=s3_client) + + assert 'Error uploading to S3' in caplog.text + +#@pytest.mark.describe("Helpers") +# @pytest.mark.it("Query processor returns correctly formatted dict") +# def test_process_query(): +# with patch("src.api.helpers.get_db_connection") as mock_conn: +# mock_conn().run.side_effect = db_data +# mock_conn().columns = sample_headers +# result = process_query("test query") +# assert result == sample_result \ No newline at end of file -- cgit v1.2.3 From c9bf342c8f6038a3f5397bfc8c53d251f27e7eec Mon Sep 17 00:00:00 2001 From: Ang Bel Date: Thu, 15 Aug 2024 16:45:47 +0100 Subject: procefss_and_upload_tables test in progress --- requirements.txt | 30 ++++++++++++++++++++++++++++ src/extract_lambda.py | 30 +++++++++++++++++++--------- tests/dummy_identical.csv | 4 ++++ tests/test_extract_lambda.py | 47 +++++++++++++++++++++----------------------- 4 files changed, 77 insertions(+), 34 deletions(-) create mode 100644 requirements.txt create mode 100644 tests/dummy_identical.csv (limited to 'tests') diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..6f383f9 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,30 @@ +asn1crypto==1.5.1 +boto3==1.34.159 +botocore==1.34.159 +certifi==2024.7.4 +cffi==1.17.0 +charset-normalizer==3.3.2 +cryptography==43.0.0 +idna==3.7 +iniconfig==2.0.0 +Jinja2==3.1.4 +jmespath==1.0.1 +MarkupSafe==2.1.5 +moto==5.0.12 +packaging==24.1 +pg8000==1.31.2 +pluggy==1.5.0 +pycparser==2.22 +pytest==8.3.2 +pytest-mock==3.14.0 +python-dateutil==2.9.0.post0 +python-dotenv==1.0.1 +PyYAML==6.0.2 +requests==2.32.3 +responses==0.25.3 +s3transfer==0.10.2 +scramp==1.4.5 +six==1.16.0 +urllib3==2.2.2 +Werkzeug==3.0.3 +xmltodict==0.13.0 \ No newline at end of file diff --git a/src/extract_lambda.py b/src/extract_lambda.py index 56b47a6..fb2d7e8 100644 --- a/src/extract_lambda.py +++ b/src/extract_lambda.py @@ -6,6 +6,7 @@ from botocore.exceptions import ClientError import logging import json from datetime import datetime +import re logger = logging.getLogger() @@ -117,9 +118,16 @@ def process_and_upload_tables(db, existing_files, client=boto3.client('s3')): in the existing_files dictionary with the same name. If it finds any changes to files, or new tables/files it uploads them to the s3 bucket """ - + ## NEW CODE + all_datetimes = [] + for file_names in existing_files.keys(): + datetime_str_on_s3 = ''.join(re.search(r'\/(.+/).+_(.+)\.csv',file_names).group(1,2)) + all_datetimes.append(datetime.strptime(datetime_str_on_s3, '%Y/%m/%d/%H:%M:%S')) + latest_timestamp = max(all_datetimes) + ## END OF NEW CODE + tables = db.run("SELECT table_name FROM information_schema.tables WHERE table_schema='public' AND table_type='BASE TABLE';") - + print(tables) for table in tables: table_name = table[0] rows = db.run(f"SELECT * FROM {table_name};") @@ -128,17 +136,21 @@ def process_and_upload_tables(db, existing_files, client=boto3.client('s3')): csv_file_path = f"/tmp/{table_name}.csv" with open(csv_file_path, "w", newline='') as file: writer = csv.writer(file) - column_names = [desc["name"] for desc in db.columns(f"SELECT * FROM {table_name};")] + #column_names = [desc["name"] for desc in db.columns(f"SELECT * FROM {table_name};")] + column_names = [col_name[0] for col_name in db.run(f"SELECT column_name FROM INFORMATION_SCHEMA.COLUMNS where table_name = '{table_name}';")] writer.writerow(column_names) writer.writerows(rows) - - s3_key = f"{table_name}/{datetime.today().year}/{datetime.today().month}/{datetime.today().day}/{table_name}_{datetime.now().strftime('%H:%M:%S')}.csv" + s3_key = datetime.strftime(datetime.today(),f'{table_name}/%Y/%m/%d/{table_name}_%H:%M:%S.csv') new_csv_content = open(csv_file_path, "r").read() - - - if s3_key not in existing_files or existing_files[s3_key] != new_csv_content: + ## NEW CODE + latest_s3_object_key = datetime.strftime(latest_timestamp,f'{table_name}/%Y/%m/%d/{table_name}_%H:%M:%S.csv') + ## END OF NEW CODE + if existing_files[latest_s3_object_key] != new_csv_content: try: client.upload_file(csv_file_path, 'extract_bucket', s3_key) logger.info(f"Uploaded {s3_key} to S3.") except ClientError as e: - logger.error(f'Error uploading to S3: {e}') \ No newline at end of file + logger.error(f'Error uploading to S3: {e}') + else: + logger.info(f"No new data.") + \ No newline at end of file diff --git a/tests/dummy_identical.csv b/tests/dummy_identical.csv new file mode 100644 index 0000000..fdd8993 --- /dev/null +++ b/tests/dummy_identical.csv @@ -0,0 +1,4 @@ +Food_type,Flavour,Colour +Vegetable,Sour,Green +Berry,Sweet,Red + diff --git a/tests/test_extract_lambda.py b/tests/test_extract_lambda.py index 74d7e2c..e94a8a4 100644 --- a/tests/test_extract_lambda.py +++ b/tests/test_extract_lambda.py @@ -1,7 +1,7 @@ import pytest import boto3 from moto import mock_aws -from unittest.mock import patch +from unittest.mock import patch, MagicMock from unittest import TestCase from src.extract_lambda import list_existing_s3_files, connect_to_database, DBConnectionException, process_and_upload_tables import os @@ -81,32 +81,29 @@ class TestConnectToDatabase: with pytest.raises(DBConnectionException): connect_to_database() assert 'Interface error' in caplog.text - +''' class TestProcessAndUploadTables: - def test_error_process_and_upload_tables(mock_conn, mock_config, s3_client, caplog, mocker): + def test_error_process_and_upload_tables(mock_conn, mock_config, s3_client, caplog): logger = logging.getLogger() logger.info('Testing now.') caplog.set_level(logging.ERROR) - - with patch("src.extract_lambda.Connection", autospec=True) as mock_conn: - mock_db = connect_to_database() - # need to add a table - s3_key = 'dummy/2024/8/14/dummy_16:46:30.txt' - mock_existing_files = mocker.Mock(return_value={s3_key: 'This is a test file.' }) + #### + queries = ["SELECT table_name FROM information_schema.tables WHERE table_schema='public' AND table_type='BASE TABLE';", + "SELECT * FROM Fruits;", + "SELECT column_name FROM INFORMATION_SCHEMA.COLUMNS where table_name = 'Fruits'"] + return_values = [[['Fruits']], + [['Vegetable','Sour','Green'],['Berry','Sweet','Red']], + [['Food_type'],['Flavour'],['Colour']]] + vals = dict(zip(queries,return_values)) + + #### + with patch('src.extract_lambda.connect_to_database') as mock_db: + mock_db().run.side_effects = return_values + s3_key = 'Fruits/2024/08/15/Fruits_16:46:30.csv' + existing_files = {s3_key: 'Food_type,Flavour,Colour\nFruit,Sour,Green\nBerry,Sweet,Red'} s3_client.create_bucket(Bucket='extract_bucket', - CreateBucketConfiguration={ - 'LocationConstraint': 'eu-west-2' - }) - s3_client.upload_file('tests/dummy.txt', 'extract_bucket', s3_key) - process_and_upload_tables(mock_db, mock_existing_files, client=s3_client) - - assert 'Error uploading to S3' in caplog.text - -#@pytest.mark.describe("Helpers") -# @pytest.mark.it("Query processor returns correctly formatted dict") -# def test_process_query(): -# with patch("src.api.helpers.get_db_connection") as mock_conn: -# mock_conn().run.side_effect = db_data -# mock_conn().columns = sample_headers -# result = process_query("test query") -# assert result == sample_result \ No newline at end of file + CreateBucketConfiguration={'LocationConstraint': 'eu-west-2'}) + s3_client.upload_file('tests/dummy_identical.csv', 'extract_bucket', s3_key) + process_and_upload_tables(mock_db(), existing_files, client=s3_client) + assert 'No new data.' in caplog.text +''' \ No newline at end of file -- cgit v1.2.3