From a104e4a04c914eed6b2a9c52107cdbf7f164d106 Mon Sep 17 00:00:00 2001 From: Alex Schofield Date: Tue, 18 Feb 2025 17:02:38 +0000 Subject: add subheading comments for test sections in test_csv_reader.py --- test/test_csv_reader.py | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'test') diff --git a/test/test_csv_reader.py b/test/test_csv_reader.py index af13cff..271adae 100644 --- a/test/test_csv_reader.py +++ b/test/test_csv_reader.py @@ -5,6 +5,8 @@ from obfuscator.csv_reader import CSVReader reader = CSVReader() +# CSVREADER: READ_STRING TESTS + # Check if the function can read a CSV string with no content and return # an empty list @@ -61,3 +63,5 @@ def test_csv_with_quoted_fields_should_run_as_expected(): {"student_id": "5678", "name": "Student 2", "course": "Course 2"}, ] assert result == expected + +# CSVREADER: READ_S3 TESTS \ No newline at end of file -- cgit v1.2.3 From 4a2b7bbae7c95ade8bcb13ae1ea270469b685f0f Mon Sep 17 00:00:00 2001 From: Alex Schofield Date: Tue, 18 Feb 2025 17:05:19 +0000 Subject: add & import boto3 and moto to pyproject.toml/test_csv_reader.py --- pyproject.toml | 3 ++- test/test_csv_reader.py | 2 ++ 2 files changed, 4 insertions(+), 1 deletion(-) (limited to 'test') diff --git a/pyproject.toml b/pyproject.toml index cc923b9..46114bc 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -5,7 +5,7 @@ description = "A Python library designed to detect and remove Personally Identif authors = [{ name = "Alex Schofield", email = "git@ajschof.me" }] readme = "README.md" requires-python = ">=3.13" -dependencies = ["tabulate (>=0.9.0,<0.10.0)"] +dependencies = ["tabulate (>=0.9.0,<0.10.0)", "boto3 (>=1.36.22,<2.0.0)"] [tool.poetry] package-mode = false @@ -13,6 +13,7 @@ package-mode = false [tool.poetry.group.dev.dependencies] pytest = "8.3.4" pytest-cov = "^6.0.0" +moto = "^5.0.28" [build-system] requires = ["poetry-core>=2.0.0,<3.0.0"] diff --git a/test/test_csv_reader.py b/test/test_csv_reader.py index 271adae..f59a5a7 100644 --- a/test/test_csv_reader.py +++ b/test/test_csv_reader.py @@ -1,6 +1,8 @@ # csv_reader.py - tests # Author: Alex Schofield +import boto3 +from moto import mock_s3 from obfuscator.csv_reader import CSVReader reader = CSVReader() -- cgit v1.2.3 From 81f7d60e1a20f2d504d810fb44b01c79bd6d55a0 Mon Sep 17 00:00:00 2001 From: Alex Schofield Date: Tue, 18 Feb 2025 19:44:44 +0000 Subject: add first test for read_s3 method in CSVReader" --- test/test_csv_reader.py | 45 +++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 43 insertions(+), 2 deletions(-) (limited to 'test') diff --git a/test/test_csv_reader.py b/test/test_csv_reader.py index f59a5a7..27abea0 100644 --- a/test/test_csv_reader.py +++ b/test/test_csv_reader.py @@ -2,8 +2,9 @@ # Author: Alex Schofield import boto3 -from moto import mock_s3 +from moto import mock_aws from obfuscator.csv_reader import CSVReader +import pytest reader = CSVReader() @@ -66,4 +67,44 @@ def test_csv_with_quoted_fields_should_run_as_expected(): ] assert result == expected -# CSVREADER: READ_S3 TESTS \ No newline at end of file + +# CSVREADER: READ_S3 TESTS + + +def setup_s3(s3_client, bucket: str, key: str, content: str): + s3_client.create_bucket( + Bucket="test-bucket", + CreateBucketConfiguration={"LocationConstraint": "eu-west-2"}, + ) + s3_client.put_object(Bucket=bucket, Key=key, Body=content) + + +@pytest.fixture(autouse=True) +def s3_client(): + with mock_aws(): + yield boto3.client("s3", "eu-west-2") + + +def test_read_s3_valid_csv_returns_expected(): + with mock_aws(): + s3 = boto3.client("s3", region_name="eu-west-2") + bucket = "test-bucket" + key = "data/mock.csv" + + csv_content = ( + "student_id,name,course\n" + "1234,Student 1,Course 1\n" + "5678,Student 2,Course 2\n" + ) + + setup_s3(s3, bucket, key, csv_content) + path = f"s3://{bucket}/{key}" + + data = reader.read_s3(path) + + expected = [ + {"student_id": "1234", "name": "Student 1", "course": "Course 1"}, + {"student_id": "5678", "name": "Student 2", "course": "Course 2"}, + ] + + assert data == expected -- cgit v1.2.3 From ebb884f21106a574d1ab5b61e3d2a34080f0b9a3 Mon Sep 17 00:00:00 2001 From: Alex Schofield Date: Tue, 18 Feb 2025 23:00:19 +0000 Subject: add tests for read_s3 in CSVReader --- test/test_csv_reader.py | 96 ++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 95 insertions(+), 1 deletion(-) (limited to 'test') diff --git a/test/test_csv_reader.py b/test/test_csv_reader.py index 27abea0..540e172 100644 --- a/test/test_csv_reader.py +++ b/test/test_csv_reader.py @@ -73,7 +73,7 @@ def test_csv_with_quoted_fields_should_run_as_expected(): def setup_s3(s3_client, bucket: str, key: str, content: str): s3_client.create_bucket( - Bucket="test-bucket", + Bucket=bucket, CreateBucketConfiguration={"LocationConstraint": "eu-west-2"}, ) s3_client.put_object(Bucket=bucket, Key=key, Body=content) @@ -108,3 +108,97 @@ def test_read_s3_valid_csv_returns_expected(): ] assert data == expected + + +def test_read_s3_empty_csv_returns_empty_list(): + with mock_aws(): + s3 = boto3.client("s3", region_name="eu-west-2") + bucket = "empty-bucket" + key = "data/empty.csv" + csv_content = "student_id,name,course\n" + setup_s3(s3, bucket, key, csv_content) + path = f"s3://{bucket}/{key}" + + data = CSVReader.read_s3(path) + assert data == [] + + +def test_read_s3_nonexistent_bucket_raises_exception(): + with mock_aws(): + bucket = "nonexistent-bucket" + key = "data/mock.csv" + path = f"s3://{bucket}/{key}" + with pytest.raises(Exception): + CSVReader.read_s3(path) + + +def test_read_s3_nonexistent_key_raises_exception(): + with mock_aws(): + s3 = boto3.client("s3", region_name="eu-west-2") + bucket = "test-bucket" + s3.create_bucket( + Bucket=bucket, + CreateBucketConfiguration={"LocationConstraint": "eu-west-2"}, + ) + key = "data/nonexistent.csv" + path = f"s3://{bucket}/{key}" + with pytest.raises(Exception): + CSVReader.read_s3(path) + + +def test_read_s3_malformed_csv_returns_expected(): + with mock_aws(): + s3 = boto3.client("s3", region_name="eu-west-2") + bucket = "test-bucket" + key = "data/malformed.csv" + csv_content = "1234,Student 1,Course 1\n" "5678,Student 2,Course 2\n" + setup_s3(s3, bucket, key, csv_content) + path = f"s3://{bucket}/{key}" + + data = CSVReader.read_s3(path) + expected = [{"1234": "5678", "Student 1": "Student 2", "Course 1": "Course 2"}] + assert data == expected + + +def test_read_s3_csv_with_extra_empty_lines(): + with mock_aws(): + s3 = boto3.client("s3", region_name="eu-west-2") + bucket = "test-bucket" + key = "data/extra_lines.csv" + csv_content = ( + "student_id,name,course\n" + "1234,Student 1,Course 1\n" + "\n" + "5678,Student 2,Course 2\n" + "\n" + ) + setup_s3(s3, bucket, key, csv_content) + path = f"s3://{bucket}/{key}" + + data = CSVReader.read_s3(path) + expected = [ + {"student_id": "1234", "name": "Student 1", "course": "Course 1"}, + {"student_id": "5678", "name": "Student 2", "course": "Course 2"}, + ] + assert data == expected + + +def test_read_s3_csv_with_whitespace_in_fields(): + with mock_aws(): + s3 = boto3.client("s3", region_name="eu-west-2") + bucket = "test-bucket" + key = "data/whitespace.csv" + csv_content = ( + "student_id, name , course \n" + " 1234 , Student 1 , Course 1 \n" + "5678,Student 2,Course 2\n" + ) + setup_s3(s3, bucket, key, csv_content) + path = f"s3://{bucket}/{key}" + + data = CSVReader.read_s3(path) + expected = [ + {"student_id": " 1234 ", " name ": " Student 1 ", " course ": " Course 1 "}, + {"student_id": "5678", " name ": "Student 2", " course ": "Course 2"}, + ] + assert data == expected -- cgit v1.2.3 From 20572634aaab2b522128a88449b2a32b6c028fc4 Mon Sep 17 00:00:00 2001 From: Alex Schofield Date: Tue, 18 Feb 2025 23:05:33 +0000 Subject: update test_csv_reader.py header --- test/test_csv_reader.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'test') diff --git a/test/test_csv_reader.py b/test/test_csv_reader.py index 540e172..e4c135b 100644 --- a/test/test_csv_reader.py +++ b/test/test_csv_reader.py @@ -1,4 +1,4 @@ -# csv_reader.py - tests +# csv_reader.py - tests for read_string and read_s3 # Author: Alex Schofield import boto3 -- cgit v1.2.3