diff options
Diffstat (limited to 'test/test_csv_reader.py')
| -rw-r--r-- | test/test_csv_reader.py | 181 |
1 files changed, 0 insertions, 181 deletions
diff --git a/test/test_csv_reader.py b/test/test_csv_reader.py deleted file mode 100644 index d6129e7..0000000 --- a/test/test_csv_reader.py +++ /dev/null @@ -1,181 +0,0 @@ -import boto3 -from moto import mock_aws -from obfuscator.csv_reader import CSVReader -import pytest - -reader = CSVReader(log_level="DEBUG") - - -def test_empty_csv_should_return_no_content(): - content = "" - result = reader.read_string(content) - expected = [] - assert result == expected - - -def test_csv_with_header_only_should_return_no_content(): - content = "student_id,name,course\n" - result = reader.read_string(content) - expected = [] - assert result == expected - - -def test_csv_with_valid_data(): - content = ( - "student_id,name,course\n" - "1234,Student 1,Course 1\n" - "5678,Student 2,Course 2\n" - ) - result = reader.read_string(content) - expected = [ - {"student_id": "1234", "name": "Student 1", "course": "Course 1"}, - {"student_id": "5678", "name": "Student 2", "course": "Course 2"}, - ] - assert result == expected - - -def test_csv_with_quoted_fields_should_run_as_expected(): - content = ( - "student_id,name,course\n" - '1234,"Student 1","Course 1"\n' - '5678,"Student 2","Course 2"\n' - ) - result = reader.read_string(content) - expected = [ - {"student_id": "1234", "name": "Student 1", "course": "Course 1"}, - {"student_id": "5678", "name": "Student 2", "course": "Course 2"}, - ] - assert result == expected - - -def setup_s3(s3_client, bucket: str, key: str, content: str): - s3_client.create_bucket( - Bucket=bucket, - CreateBucketConfiguration={"LocationConstraint": "eu-west-2"}, - ) - s3_client.put_object(Bucket=bucket, Key=key, Body=content) - - -@pytest.fixture(autouse=True) -def s3_client(): - with mock_aws(): - yield boto3.client("s3", "eu-west-2") - - -def test_read_s3_valid_csv_returns_expected(): - with mock_aws(): - s3 = boto3.client("s3", region_name="eu-west-2") - bucket = "test-bucket" - key = "data/mock.csv" - - csv_content = ( - "student_id,name,course\n" - "1234,Student 1,Course 1\n" - "5678,Student 2,Course 2\n" - ) - - setup_s3(s3, bucket, key, csv_content) - path = f"s3://{bucket}/{key}" - - data = reader.read_s3(path) - - expected = [ - {"student_id": "1234", "name": "Student 1", "course": "Course 1"}, - {"student_id": "5678", "name": "Student 2", "course": "Course 2"}, - ] - - assert data == expected - - -def test_read_s3_empty_csv_returns_empty_list(): - with mock_aws(): - s3 = boto3.client("s3", region_name="eu-west-2") - bucket = "empty-bucket" - key = "data/empty.csv" - csv_content = "student_id,name,course\n" - setup_s3(s3, bucket, key, csv_content) - path = f"s3://{bucket}/{key}" - - data = reader.read_s3(path) - assert data == [] - - -def test_read_s3_nonexistent_bucket_raises_exception(): - with mock_aws(): - bucket = "nonexistent-bucket" - key = "data/mock.csv" - path = f"s3://{bucket}/{key}" - with pytest.raises(Exception): - reader.read_s3(path) - - -def test_read_s3_nonexistent_key_raises_exception(): - with mock_aws(): - s3 = boto3.client("s3", region_name="eu-west-2") - bucket = "test-bucket" - s3.create_bucket( - Bucket=bucket, - CreateBucketConfiguration={"LocationConstraint": "eu-west-2"}, - ) - key = "data/nonexistent.csv" - path = f"s3://{bucket}/{key}" - with pytest.raises(Exception): - reader.read_s3(path) - - -def test_read_s3_malformed_csv_returns_expected(): - with mock_aws(): - s3 = boto3.client("s3", region_name="eu-west-2") - bucket = "test-bucket" - key = "data/malformed.csv" - csv_content = "1234,Student 1,Course 1\n" "5678,Student 2,Course 2\n" - setup_s3(s3, bucket, key, csv_content) - path = f"s3://{bucket}/{key}" - - data = reader.read_s3(path) - expected = [{"1234": "5678", "Student 1": "Student 2", "Course 1": "Course 2"}] - assert data == expected - - -def test_read_s3_csv_with_extra_empty_lines(): - with mock_aws(): - s3 = boto3.client("s3", region_name="eu-west-2") - bucket = "test-bucket" - key = "data/extra_lines.csv" - csv_content = ( - "student_id,name,course\n" - "1234,Student 1,Course 1\n" - "\n" - "5678,Student 2,Course 2\n" - "\n" - ) - setup_s3(s3, bucket, key, csv_content) - path = f"s3://{bucket}/{key}" - - data = reader.read_s3(path) - expected = [ - {"student_id": "1234", "name": "Student 1", "course": "Course 1"}, - {"student_id": "5678", "name": "Student 2", "course": "Course 2"}, - ] - assert data == expected - - -def test_read_s3_csv_with_whitespace_in_fields(): - with mock_aws(): - s3 = boto3.client("s3", region_name="eu-west-2") - bucket = "test-bucket" - key = "data/whitespace.csv" - csv_content = ( - "student_id, name , course \n" - " 1234 , Student 1 , Course 1 \n" - "5678,Student 2,Course 2\n" - ) - setup_s3(s3, bucket, key, csv_content) - path = f"s3://{bucket}/{key}" - - data = reader.read_s3(path) - expected = [ - {"student_id": " 1234 ", " name ": " Student 1 ", " course ": " Course 1 "}, - {"student_id": "5678", " name ": "Student 2", " course ": "Course 2"}, - ] - assert data == expected |
