From ae89b05dbc8feebc1410f39143c0d829f8704235 Mon Sep 17 00:00:00 2001 From: Alex Schofield Date: Wed, 19 Feb 2025 15:37:16 +0000 Subject: rename CSVReader to DataReader & update references --- test/test_read.py | 181 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 181 insertions(+) create mode 100644 test/test_read.py (limited to 'test/test_read.py') diff --git a/test/test_read.py b/test/test_read.py new file mode 100644 index 0000000..903ab5d --- /dev/null +++ b/test/test_read.py @@ -0,0 +1,181 @@ +import boto3 +from moto import mock_aws +from obfuscator.read import CSVReader +import pytest + +reader = CSVReader(log_level="DEBUG") + + +def test_empty_csv_should_return_no_content(): + content = "" + result = reader.read_string(content) + expected = [] + assert result == expected + + +def test_csv_with_header_only_should_return_no_content(): + content = "student_id,name,course\n" + result = reader.read_string(content) + expected = [] + assert result == expected + + +def test_csv_with_valid_data(): + content = ( + "student_id,name,course\n" + "1234,Student 1,Course 1\n" + "5678,Student 2,Course 2\n" + ) + result = reader.read_string(content) + expected = [ + {"student_id": "1234", "name": "Student 1", "course": "Course 1"}, + {"student_id": "5678", "name": "Student 2", "course": "Course 2"}, + ] + assert result == expected + + +def test_csv_with_quoted_fields_should_run_as_expected(): + content = ( + "student_id,name,course\n" + '1234,"Student 1","Course 1"\n' + '5678,"Student 2","Course 2"\n' + ) + result = reader.read_string(content) + expected = [ + {"student_id": "1234", "name": "Student 1", "course": "Course 1"}, + {"student_id": "5678", "name": "Student 2", "course": "Course 2"}, + ] + assert result == expected + + +def setup_s3(s3_client, bucket: str, key: str, content: str): + s3_client.create_bucket( + Bucket=bucket, + CreateBucketConfiguration={"LocationConstraint": "eu-west-2"}, + ) + s3_client.put_object(Bucket=bucket, Key=key, Body=content) + + +@pytest.fixture(autouse=True) +def s3_client(): + with mock_aws(): + yield boto3.client("s3", "eu-west-2") + + +def test_read_s3_valid_csv_returns_expected(): + with mock_aws(): + s3 = boto3.client("s3", region_name="eu-west-2") + bucket = "test-bucket" + key = "data/mock.csv" + + csv_content = ( + "student_id,name,course\n" + "1234,Student 1,Course 1\n" + "5678,Student 2,Course 2\n" + ) + + setup_s3(s3, bucket, key, csv_content) + path = f"s3://{bucket}/{key}" + + data = reader.read_s3(path) + + expected = [ + {"student_id": "1234", "name": "Student 1", "course": "Course 1"}, + {"student_id": "5678", "name": "Student 2", "course": "Course 2"}, + ] + + assert data == expected + + +def test_read_s3_empty_csv_returns_empty_list(): + with mock_aws(): + s3 = boto3.client("s3", region_name="eu-west-2") + bucket = "empty-bucket" + key = "data/empty.csv" + csv_content = "student_id,name,course\n" + setup_s3(s3, bucket, key, csv_content) + path = f"s3://{bucket}/{key}" + + data = reader.read_s3(path) + assert data == [] + + +def test_read_s3_nonexistent_bucket_raises_exception(): + with mock_aws(): + bucket = "nonexistent-bucket" + key = "data/mock.csv" + path = f"s3://{bucket}/{key}" + with pytest.raises(Exception): + reader.read_s3(path) + + +def test_read_s3_nonexistent_key_raises_exception(): + with mock_aws(): + s3 = boto3.client("s3", region_name="eu-west-2") + bucket = "test-bucket" + s3.create_bucket( + Bucket=bucket, + CreateBucketConfiguration={"LocationConstraint": "eu-west-2"}, + ) + key = "data/nonexistent.csv" + path = f"s3://{bucket}/{key}" + with pytest.raises(Exception): + reader.read_s3(path) + + +def test_read_s3_malformed_csv_returns_expected(): + with mock_aws(): + s3 = boto3.client("s3", region_name="eu-west-2") + bucket = "test-bucket" + key = "data/malformed.csv" + csv_content = "1234,Student 1,Course 1\n" "5678,Student 2,Course 2\n" + setup_s3(s3, bucket, key, csv_content) + path = f"s3://{bucket}/{key}" + + data = reader.read_s3(path) + expected = [{"1234": "5678", "Student 1": "Student 2", "Course 1": "Course 2"}] + assert data == expected + + +def test_read_s3_csv_with_extra_empty_lines(): + with mock_aws(): + s3 = boto3.client("s3", region_name="eu-west-2") + bucket = "test-bucket" + key = "data/extra_lines.csv" + csv_content = ( + "student_id,name,course\n" + "1234,Student 1,Course 1\n" + "\n" + "5678,Student 2,Course 2\n" + "\n" + ) + setup_s3(s3, bucket, key, csv_content) + path = f"s3://{bucket}/{key}" + + data = reader.read_s3(path) + expected = [ + {"student_id": "1234", "name": "Student 1", "course": "Course 1"}, + {"student_id": "5678", "name": "Student 2", "course": "Course 2"}, + ] + assert data == expected + + +def test_read_s3_csv_with_whitespace_in_fields(): + with mock_aws(): + s3 = boto3.client("s3", region_name="eu-west-2") + bucket = "test-bucket" + key = "data/whitespace.csv" + csv_content = ( + "student_id, name , course \n" + " 1234 , Student 1 , Course 1 \n" + "5678,Student 2,Course 2\n" + ) + setup_s3(s3, bucket, key, csv_content) + path = f"s3://{bucket}/{key}" + + data = reader.read_s3(path) + expected = [ + {"student_id": " 1234 ", " name ": " Student 1 ", " course ": " Course 1 "}, + {"student_id": "5678", " name ": "Student 2", " course ": "Course 2"}, + ] + assert data == expected -- cgit v1.2.3