aboutsummaryrefslogtreecommitdiffstats
path: root/test/test_csv_reader.py
diff options
context:
space:
mode:
authorAlex Schofield <git@ajschof.me>2025-02-19 15:37:16 +0000
committerAlex Schofield <git@ajschof.me>2025-02-19 15:37:16 +0000
commitae89b05dbc8feebc1410f39143c0d829f8704235 (patch)
tree17b42dfdd3059c29ed19a2fb5ff8376175f3c367 /test/test_csv_reader.py
parent7e3553e936958a7fc80c7d4ebae4adeb0f634851 (diff)
downloadgdpr-obfuscator-ae89b05dbc8feebc1410f39143c0d829f8704235.tar.gz
gdpr-obfuscator-ae89b05dbc8feebc1410f39143c0d829f8704235.zip
rename CSVReader to DataReader & update references
Diffstat (limited to 'test/test_csv_reader.py')
-rw-r--r--test/test_csv_reader.py181
1 files changed, 0 insertions, 181 deletions
diff --git a/test/test_csv_reader.py b/test/test_csv_reader.py
deleted file mode 100644
index d6129e7..0000000
--- a/test/test_csv_reader.py
+++ /dev/null
@@ -1,181 +0,0 @@
-import boto3
-from moto import mock_aws
-from obfuscator.csv_reader import CSVReader
-import pytest
-
-reader = CSVReader(log_level="DEBUG")
-
-
-def test_empty_csv_should_return_no_content():
- content = ""
- result = reader.read_string(content)
- expected = []
- assert result == expected
-
-
-def test_csv_with_header_only_should_return_no_content():
- content = "student_id,name,course\n"
- result = reader.read_string(content)
- expected = []
- assert result == expected
-
-
-def test_csv_with_valid_data():
- content = (
- "student_id,name,course\n"
- "1234,Student 1,Course 1\n"
- "5678,Student 2,Course 2\n"
- )
- result = reader.read_string(content)
- expected = [
- {"student_id": "1234", "name": "Student 1", "course": "Course 1"},
- {"student_id": "5678", "name": "Student 2", "course": "Course 2"},
- ]
- assert result == expected
-
-
-def test_csv_with_quoted_fields_should_run_as_expected():
- content = (
- "student_id,name,course\n"
- '1234,"Student 1","Course 1"\n'
- '5678,"Student 2","Course 2"\n'
- )
- result = reader.read_string(content)
- expected = [
- {"student_id": "1234", "name": "Student 1", "course": "Course 1"},
- {"student_id": "5678", "name": "Student 2", "course": "Course 2"},
- ]
- assert result == expected
-
-
-def setup_s3(s3_client, bucket: str, key: str, content: str):
- s3_client.create_bucket(
- Bucket=bucket,
- CreateBucketConfiguration={"LocationConstraint": "eu-west-2"},
- )
- s3_client.put_object(Bucket=bucket, Key=key, Body=content)
-
-
-@pytest.fixture(autouse=True)
-def s3_client():
- with mock_aws():
- yield boto3.client("s3", "eu-west-2")
-
-
-def test_read_s3_valid_csv_returns_expected():
- with mock_aws():
- s3 = boto3.client("s3", region_name="eu-west-2")
- bucket = "test-bucket"
- key = "data/mock.csv"
-
- csv_content = (
- "student_id,name,course\n"
- "1234,Student 1,Course 1\n"
- "5678,Student 2,Course 2\n"
- )
-
- setup_s3(s3, bucket, key, csv_content)
- path = f"s3://{bucket}/{key}"
-
- data = reader.read_s3(path)
-
- expected = [
- {"student_id": "1234", "name": "Student 1", "course": "Course 1"},
- {"student_id": "5678", "name": "Student 2", "course": "Course 2"},
- ]
-
- assert data == expected
-
-
-def test_read_s3_empty_csv_returns_empty_list():
- with mock_aws():
- s3 = boto3.client("s3", region_name="eu-west-2")
- bucket = "empty-bucket"
- key = "data/empty.csv"
- csv_content = "student_id,name,course\n"
- setup_s3(s3, bucket, key, csv_content)
- path = f"s3://{bucket}/{key}"
-
- data = reader.read_s3(path)
- assert data == []
-
-
-def test_read_s3_nonexistent_bucket_raises_exception():
- with mock_aws():
- bucket = "nonexistent-bucket"
- key = "data/mock.csv"
- path = f"s3://{bucket}/{key}"
- with pytest.raises(Exception):
- reader.read_s3(path)
-
-
-def test_read_s3_nonexistent_key_raises_exception():
- with mock_aws():
- s3 = boto3.client("s3", region_name="eu-west-2")
- bucket = "test-bucket"
- s3.create_bucket(
- Bucket=bucket,
- CreateBucketConfiguration={"LocationConstraint": "eu-west-2"},
- )
- key = "data/nonexistent.csv"
- path = f"s3://{bucket}/{key}"
- with pytest.raises(Exception):
- reader.read_s3(path)
-
-
-def test_read_s3_malformed_csv_returns_expected():
- with mock_aws():
- s3 = boto3.client("s3", region_name="eu-west-2")
- bucket = "test-bucket"
- key = "data/malformed.csv"
- csv_content = "1234,Student 1,Course 1\n" "5678,Student 2,Course 2\n"
- setup_s3(s3, bucket, key, csv_content)
- path = f"s3://{bucket}/{key}"
-
- data = reader.read_s3(path)
- expected = [{"1234": "5678", "Student 1": "Student 2", "Course 1": "Course 2"}]
- assert data == expected
-
-
-def test_read_s3_csv_with_extra_empty_lines():
- with mock_aws():
- s3 = boto3.client("s3", region_name="eu-west-2")
- bucket = "test-bucket"
- key = "data/extra_lines.csv"
- csv_content = (
- "student_id,name,course\n"
- "1234,Student 1,Course 1\n"
- "\n"
- "5678,Student 2,Course 2\n"
- "\n"
- )
- setup_s3(s3, bucket, key, csv_content)
- path = f"s3://{bucket}/{key}"
-
- data = reader.read_s3(path)
- expected = [
- {"student_id": "1234", "name": "Student 1", "course": "Course 1"},
- {"student_id": "5678", "name": "Student 2", "course": "Course 2"},
- ]
- assert data == expected
-
-
-def test_read_s3_csv_with_whitespace_in_fields():
- with mock_aws():
- s3 = boto3.client("s3", region_name="eu-west-2")
- bucket = "test-bucket"
- key = "data/whitespace.csv"
- csv_content = (
- "student_id, name , course \n"
- " 1234 , Student 1 , Course 1 \n"
- "5678,Student 2,Course 2\n"
- )
- setup_s3(s3, bucket, key, csv_content)
- path = f"s3://{bucket}/{key}"
-
- data = reader.read_s3(path)
- expected = [
- {"student_id": " 1234 ", " name ": " Student 1 ", " course ": " Course 1 "},
- {"student_id": "5678", " name ": "Student 2", " course ": "Course 2"},
- ]
- assert data == expected
git.ajschof.me — hosted by ajschofield — powered by cgit