aboutsummaryrefslogtreecommitdiffstats
path: root/test
diff options
context:
space:
mode:
authorAlex <git@ajschof.me>2025-02-19 15:58:28 +0000
committerGitHub <noreply@github.com>2025-02-19 15:58:28 +0000
commit4066bf747e1e4c938526957c119f3f1485ee251e (patch)
tree9a1e95f4ccbdd04e19d67a6c13641a19c4d4f3e0 /test
parentf24955044c4c05e37aba4efb505ec63b44113912 (diff)
parent5402af2c7198a685a57a05e29a869e1e72a6b877 (diff)
downloadgdpr-obfuscator-4066bf747e1e4c938526957c119f3f1485ee251e.tar.gz
gdpr-obfuscator-4066bf747e1e4c938526957c119f3f1485ee251e.zip
Merge pull request #8 from ajschofield/refining-phase
mostly minor changes (fixing things up)
Diffstat (limited to 'test')
-rw-r--r--test/test_obfuscator.py15
-rw-r--r--test/test_read.py (renamed from test/test_csv_reader.py)39
-rw-r--r--test/test_write.py (renamed from test/test_csv_writer.py)15
3 files changed, 17 insertions, 52 deletions
diff --git a/test/test_obfuscator.py b/test/test_obfuscator.py
index 4f61b16..c77b6b4 100644
--- a/test/test_obfuscator.py
+++ b/test/test_obfuscator.py
@@ -1,8 +1,5 @@
from obfuscator.obfuscate import obfuscate
-# Check if the function does what its supposed to and can obfuscate
-# valid PII fields in a list of dictionaries
-
def test_obfuscate_data_with_valid_pii_fields():
data = [
@@ -39,11 +36,6 @@ def test_obfuscate_data_with_valid_pii_fields():
assert result == expected
-# Check if the function can obfuscate data even when some PII
-# fields are missing from some of the data, returning a list of dictionaries
-# but with the missing PII fields obfuscated and the rest of the data intact
-
-
def test_obfuscate_data_with_missing_pii_field():
data = [
{"student_id": "1234", "name": "John Smith", "course": "Software"},
@@ -69,9 +61,6 @@ def test_obfuscate_data_with_missing_pii_field():
assert result == expected
-# Check if the function can handle an empty list of data, returning an empty list
-
-
def test_obfuscate_data_with_no_data():
data = []
pii_fields = ["name", "email_address"]
@@ -81,10 +70,6 @@ def test_obfuscate_data_with_no_data():
assert result == expected
-# Check if the function can handle an empty list of PII fields, returning the data as is
-# without mutating it
-
-
def test_obfuscate_data_with_empty_pii_fields():
data = [
{
diff --git a/test/test_csv_reader.py b/test/test_read.py
index e4c135b..de425ce 100644
--- a/test/test_csv_reader.py
+++ b/test/test_read.py
@@ -1,17 +1,9 @@
-# csv_reader.py - tests for read_string and read_s3
-# Author: Alex Schofield
-
import boto3
from moto import mock_aws
-from obfuscator.csv_reader import CSVReader
+from obfuscator.read import DataReader
import pytest
-reader = CSVReader()
-
-# CSVREADER: READ_STRING TESTS
-
-# Check if the function can read a CSV string with no content and return
-# an empty list
+reader = DataReader(log_level="DEBUG")
def test_empty_csv_should_return_no_content():
@@ -21,10 +13,6 @@ def test_empty_csv_should_return_no_content():
assert result == expected
-# Check if the function can read a CSV string with only a header and return
-# an empty list
-
-
def test_csv_with_header_only_should_return_no_content():
content = "student_id,name,course\n"
result = reader.read_string(content)
@@ -32,10 +20,6 @@ def test_csv_with_header_only_should_return_no_content():
assert result == expected
-# Check if the function can read a CSV string with valid data and return
-# a list of dictionaries
-
-
def test_csv_with_valid_data():
content = (
"student_id,name,course\n"
@@ -50,10 +34,6 @@ def test_csv_with_valid_data():
assert result == expected
-# Check if the function can read a CSV string with quoted fields and return
-# a list of dictionaries with the quoted fields intact
-
-
def test_csv_with_quoted_fields_should_run_as_expected():
content = (
"student_id,name,course\n"
@@ -68,9 +48,6 @@ def test_csv_with_quoted_fields_should_run_as_expected():
assert result == expected
-# CSVREADER: READ_S3 TESTS
-
-
def setup_s3(s3_client, bucket: str, key: str, content: str):
s3_client.create_bucket(
Bucket=bucket,
@@ -119,7 +96,7 @@ def test_read_s3_empty_csv_returns_empty_list():
setup_s3(s3, bucket, key, csv_content)
path = f"s3://{bucket}/{key}"
- data = CSVReader.read_s3(path)
+ data = reader.read_s3(path)
assert data == []
@@ -129,7 +106,7 @@ def test_read_s3_nonexistent_bucket_raises_exception():
key = "data/mock.csv"
path = f"s3://{bucket}/{key}"
with pytest.raises(Exception):
- CSVReader.read_s3(path)
+ reader.read_s3(path)
def test_read_s3_nonexistent_key_raises_exception():
@@ -143,7 +120,7 @@ def test_read_s3_nonexistent_key_raises_exception():
key = "data/nonexistent.csv"
path = f"s3://{bucket}/{key}"
with pytest.raises(Exception):
- CSVReader.read_s3(path)
+ reader.read_s3(path)
def test_read_s3_malformed_csv_returns_expected():
@@ -155,7 +132,7 @@ def test_read_s3_malformed_csv_returns_expected():
setup_s3(s3, bucket, key, csv_content)
path = f"s3://{bucket}/{key}"
- data = CSVReader.read_s3(path)
+ data = reader.read_s3(path)
expected = [{"1234": "5678", "Student 1": "Student 2", "Course 1": "Course 2"}]
assert data == expected
@@ -175,7 +152,7 @@ def test_read_s3_csv_with_extra_empty_lines():
setup_s3(s3, bucket, key, csv_content)
path = f"s3://{bucket}/{key}"
- data = CSVReader.read_s3(path)
+ data = reader.read_s3(path)
expected = [
{"student_id": "1234", "name": "Student 1", "course": "Course 1"},
{"student_id": "5678", "name": "Student 2", "course": "Course 2"},
@@ -196,7 +173,7 @@ def test_read_s3_csv_with_whitespace_in_fields():
setup_s3(s3, bucket, key, csv_content)
path = f"s3://{bucket}/{key}"
- data = CSVReader.read_s3(path)
+ data = reader.read_s3(path)
expected = [
{"student_id": " 1234 ", " name ": " Student 1 ", " course ": " Course 1 "},
{"student_id": "5678", " name ": "Student 2", " course ": "Course 2"},
diff --git a/test/test_csv_writer.py b/test/test_write.py
index eceac28..4929b06 100644
--- a/test/test_csv_writer.py
+++ b/test/test_write.py
@@ -1,6 +1,8 @@
import io
import csv
-from obfuscator.csv_writer import create_byte_stream
+from obfuscator.write import DataWriter
+
+writer = DataWriter()
def csv_bytes_to_list(csv_bytes: bytes):
@@ -15,13 +17,14 @@ def test_create_byte_stream_valid_data():
{"student_id": "1234", "name": "Student 1", "course": "Course 1"},
{"student_id": "5678", "name": "Student 2", "course": "Course 2"},
]
- csv_bytes = create_byte_stream(data)
+ csv_bytes = writer.create_byte_stream(data)
result = csv_bytes_to_list(csv_bytes)
assert result == data
def test_create_byte_stream_empty_data():
- csv_bytes = create_byte_stream([])
+ data = []
+ csv_bytes = writer.create_byte_stream(data)
assert csv_bytes == b""
@@ -30,7 +33,7 @@ def test_create_byte_stream_handles_quoted_fields():
{"student_id": "1234", "name": 'Student "One"', "course": "Course, A"},
{"student_id": "5678", "name": 'Student "Two"', "course": "Course, B"},
]
- csv_bytes = create_byte_stream(data)
+ csv_bytes = writer.create_byte_stream(data)
result = csv_bytes_to_list(csv_bytes)
assert result == data
@@ -40,7 +43,7 @@ def test_create_byte_stream_consistent_header_order():
{"student_id": "1234", "name": "Alice", "course": "Math"},
{"student_id": "5678", "name": "Bob", "course": "Science"},
]
- csv_bytes = create_byte_stream(data)
+ csv_bytes = writer.create_byte_stream(data)
csv_string = csv_bytes.decode("utf-8")
header_line = csv_string.splitlines()[0]
expected_header = ",".join(data[0].keys())
@@ -52,6 +55,6 @@ def test_create_byte_stream_special_characters():
{"student_id": "1234", "name": "Student 1", "course": "Line1\nLine2"},
{"student_id": "5678", "name": "Student 2", "course": "Value with, comma"},
]
- csv_bytes = create_byte_stream(data)
+ csv_bytes = writer.create_byte_stream(data)
result = csv_bytes_to_list(csv_bytes)
assert result == data
git.ajschof.me — hosted by ajschofield — powered by cgit