Merge pull request #8 from ajschofield/refining-phase

mostly minor changes (fixing things up)
author: Alex <git@ajschof.me> 2025-02-19 15:58:28 +0000
committer: GitHub <noreply@github.com> 2025-02-19 15:58:28 +0000
commit: 4066bf747e1e4c938526957c119f3f1485ee251e (patch)
tree: 9a1e95f4ccbdd04e19d67a6c13641a19c4d4f3e0 /test
parent: f24955044c4c05e37aba4efb505ec63b44113912 (diff)
parent: 5402af2c7198a685a57a05e29a869e1e72a6b877 (diff)
download: gdpr-obfuscator-4066bf747e1e4c938526957c119f3f1485ee251e.tar.gz
gdpr-obfuscator-4066bf747e1e4c938526957c119f3f1485ee251e.zip
3 files changed, 17 insertions, 52 deletions
diff --git a/test/test_obfuscator.py b/test/test_obfuscator.py
index 4f61b16..c77b6b4 100644
--- a/test/test_obfuscator.py
+++ b/test/test_obfuscator.py
@@ -1,8 +1,5 @@
 from obfuscator.obfuscate import obfuscate
 
-# Check if the function does what its supposed to and can obfuscate
-# valid PII fields in a list of dictionaries
-
 
 def test_obfuscate_data_with_valid_pii_fields():
     data = [
@@ -39,11 +36,6 @@ def test_obfuscate_data_with_valid_pii_fields():
     assert result == expected
 
 
-# Check if the function can obfuscate data even when some PII
-# fields are missing from some of the data, returning a list of dictionaries
-# but with the missing PII fields obfuscated and the rest of the data intact
-
-
 def test_obfuscate_data_with_missing_pii_field():
     data = [
         {"student_id": "1234", "name": "John Smith", "course": "Software"},
@@ -69,9 +61,6 @@ def test_obfuscate_data_with_missing_pii_field():
     assert result == expected
 
 
-# Check if the function can handle an empty list of data, returning an empty list
-
-
 def test_obfuscate_data_with_no_data():
     data = []
     pii_fields = ["name", "email_address"]
@@ -81,10 +70,6 @@ def test_obfuscate_data_with_no_data():
     assert result == expected
 
 
-# Check if the function can handle an empty list of PII fields, returning the data as is
-# without mutating it
-
-
 def test_obfuscate_data_with_empty_pii_fields():
     data = [
         {
diff --git a/test/test_csv_reader.py b/test/test_read.py
index e4c135b..de425ce 100644
--- a/test/test_csv_reader.py
+++ b/test/test_read.py
@@ -1,17 +1,9 @@
-# csv_reader.py - tests for read_string and read_s3
-# Author: Alex Schofield
-
 import boto3
 from moto import mock_aws
-from obfuscator.csv_reader import CSVReader
+from obfuscator.read import DataReader
 import pytest
 
-reader = CSVReader()
-
-# CSVREADER: READ_STRING TESTS
-
-# Check if the function can read a CSV string with no content and return
-# an empty list
+reader = DataReader(log_level="DEBUG")
 
 
 def test_empty_csv_should_return_no_content():
@@ -21,10 +13,6 @@ def test_empty_csv_should_return_no_content():
     assert result == expected
 
 
-# Check if the function can read a CSV string with only a header and return
-# an empty list
-
-
 def test_csv_with_header_only_should_return_no_content():
     content = "student_id,name,course\n"
     result = reader.read_string(content)
@@ -32,10 +20,6 @@ def test_csv_with_header_only_should_return_no_content():
     assert result == expected
 
 
-# Check if the function can read a CSV string with valid data and return
-# a list of dictionaries
-
-
 def test_csv_with_valid_data():
     content = (
         "student_id,name,course\n"
@@ -50,10 +34,6 @@ def test_csv_with_valid_data():
     assert result == expected
 
 
-# Check if the function can read a CSV string with quoted fields and return
-# a list of dictionaries with the quoted fields intact
-
-
 def test_csv_with_quoted_fields_should_run_as_expected():
     content = (
         "student_id,name,course\n"
@@ -68,9 +48,6 @@ def test_csv_with_quoted_fields_should_run_as_expected():
     assert result == expected
 
 
-# CSVREADER: READ_S3 TESTS
-
-
 def setup_s3(s3_client, bucket: str, key: str, content: str):
     s3_client.create_bucket(
         Bucket=bucket,
@@ -119,7 +96,7 @@ def test_read_s3_empty_csv_returns_empty_list():
         setup_s3(s3, bucket, key, csv_content)
         path = f"s3://{bucket}/{key}"
 
-        data = CSVReader.read_s3(path)
+        data = reader.read_s3(path)
         assert data == []
 
 
@@ -129,7 +106,7 @@ def test_read_s3_nonexistent_bucket_raises_exception():
         key = "data/mock.csv"
         path = f"s3://{bucket}/{key}"
         with pytest.raises(Exception):
-            CSVReader.read_s3(path)
+            reader.read_s3(path)
 
 
 def test_read_s3_nonexistent_key_raises_exception():
@@ -143,7 +120,7 @@ def test_read_s3_nonexistent_key_raises_exception():
         key = "data/nonexistent.csv"
         path = f"s3://{bucket}/{key}"
         with pytest.raises(Exception):
-            CSVReader.read_s3(path)
+            reader.read_s3(path)
 
 
 def test_read_s3_malformed_csv_returns_expected():
@@ -155,7 +132,7 @@ def test_read_s3_malformed_csv_returns_expected():
         setup_s3(s3, bucket, key, csv_content)
         path = f"s3://{bucket}/{key}"
 
-        data = CSVReader.read_s3(path)
+        data = reader.read_s3(path)
         expected = [{"1234": "5678", "Student 1": "Student 2", "Course 1": "Course 2"}]
         assert data == expected
 
@@ -175,7 +152,7 @@ def test_read_s3_csv_with_extra_empty_lines():
         setup_s3(s3, bucket, key, csv_content)
         path = f"s3://{bucket}/{key}"
 
-        data = CSVReader.read_s3(path)
+        data = reader.read_s3(path)
         expected = [
             {"student_id": "1234", "name": "Student 1", "course": "Course 1"},
             {"student_id": "5678", "name": "Student 2", "course": "Course 2"},
@@ -196,7 +173,7 @@ def test_read_s3_csv_with_whitespace_in_fields():
         setup_s3(s3, bucket, key, csv_content)
         path = f"s3://{bucket}/{key}"
 
-        data = CSVReader.read_s3(path)
+        data = reader.read_s3(path)
         expected = [
             {"student_id": " 1234 ", " name ": " Student 1 ", " course ": " Course 1 "},
             {"student_id": "5678", " name ": "Student 2", " course ": "Course 2"},
diff --git a/test/test_csv_writer.py b/test/test_write.py
index eceac28..4929b06 100644
--- a/test/test_csv_writer.py
+++ b/test/test_write.py
@@ -1,6 +1,8 @@
 import io
 import csv
-from obfuscator.csv_writer import create_byte_stream
+from obfuscator.write import DataWriter
+
+writer = DataWriter()
 
 
 def csv_bytes_to_list(csv_bytes: bytes):
@@ -15,13 +17,14 @@ def test_create_byte_stream_valid_data():
         {"student_id": "1234", "name": "Student 1", "course": "Course 1"},
         {"student_id": "5678", "name": "Student 2", "course": "Course 2"},
     ]
-    csv_bytes = create_byte_stream(data)
+    csv_bytes = writer.create_byte_stream(data)
     result = csv_bytes_to_list(csv_bytes)
     assert result == data
 
 
 def test_create_byte_stream_empty_data():
-    csv_bytes = create_byte_stream([])
+    data = []
+    csv_bytes = writer.create_byte_stream(data)
     assert csv_bytes == b""
 
 
@@ -30,7 +33,7 @@ def test_create_byte_stream_handles_quoted_fields():
         {"student_id": "1234", "name": 'Student "One"', "course": "Course, A"},
         {"student_id": "5678", "name": 'Student "Two"', "course": "Course, B"},
     ]
-    csv_bytes = create_byte_stream(data)
+    csv_bytes = writer.create_byte_stream(data)
     result = csv_bytes_to_list(csv_bytes)
     assert result == data
 
@@ -40,7 +43,7 @@ def test_create_byte_stream_consistent_header_order():
         {"student_id": "1234", "name": "Alice", "course": "Math"},
         {"student_id": "5678", "name": "Bob", "course": "Science"},
     ]
-    csv_bytes = create_byte_stream(data)
+    csv_bytes = writer.create_byte_stream(data)
     csv_string = csv_bytes.decode("utf-8")
     header_line = csv_string.splitlines()[0]
     expected_header = ",".join(data[0].keys())
@@ -52,6 +55,6 @@ def test_create_byte_stream_special_characters():
         {"student_id": "1234", "name": "Student 1", "course": "Line1\nLine2"},
         {"student_id": "5678", "name": "Student 2", "course": "Value with, comma"},
     ]
-    csv_bytes = create_byte_stream(data)
+    csv_bytes = writer.create_byte_stream(data)
     result = csv_bytes_to_list(csv_bytes)
     assert result == data
author	Alex <git@ajschof.me>	2025-02-19 15:58:28 +0000
committer	GitHub <noreply@github.com>	2025-02-19 15:58:28 +0000
commit	4066bf747e1e4c938526957c119f3f1485ee251e (patch)
tree	9a1e95f4ccbdd04e19d67a6c13641a19c4d4f3e0 /test
parent	f24955044c4c05e37aba4efb505ec63b44113912 (diff)
parent	5402af2c7198a685a57a05e29a869e1e72a6b877 (diff)
download	gdpr-obfuscator-4066bf747e1e4c938526957c119f3f1485ee251e.tar.gz gdpr-obfuscator-4066bf747e1e4c938526957c119f3f1485ee251e.zip