aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAlex <git@ajschof.me>2025-02-18 23:34:53 +0000
committerGitHub <noreply@github.com>2025-02-18 23:34:53 +0000
commitf24955044c4c05e37aba4efb505ec63b44113912 (patch)
treee2096c282bdcab9b7c92e1ed6511f65fdd5caead
parenteb0d30d0235dbadd1d5c385a0a49d4cd8aea021e (diff)
parent90542cfe838376988982fb5c9062fc8dee0b7c87 (diff)
downloadgdpr-obfuscator-f24955044c4c05e37aba4efb505ec63b44113912.tar.gz
gdpr-obfuscator-f24955044c4c05e37aba4efb505ec63b44113912.zip
Merge pull request #7 from ajschofield/feat/csv-writer
add create_byte_stream for obfuscation output
-rw-r--r--cli.py2
-rw-r--r--obfuscator/csv_writer.py26
-rw-r--r--test/test_csv_writer.py57
3 files changed, 85 insertions, 0 deletions
diff --git a/cli.py b/cli.py
index b1d4000..f2c8771 100644
--- a/cli.py
+++ b/cli.py
@@ -3,6 +3,7 @@ import json
from obfuscator.csv_reader import CSVReader
from obfuscator.obfuscate import obfuscate
from obfuscator.logger import get_logger
+from obfuscator.csv_writer import create_byte_stream
# Create the logger
logger = get_logger("CLI")
@@ -44,6 +45,7 @@ def main():
obfuscated_data = obfuscate(data, args.pii)
# For debug purposes, log the obfuscated data as JSON for readability
logger.debug("Obfuscated data (JSON): " + json.dumps(obfuscated_data, indent=4))
+ return create_byte_stream(obfuscated_data)
# If the script is run directly (as it should be), call the main function
diff --git a/obfuscator/csv_writer.py b/obfuscator/csv_writer.py
new file mode 100644
index 0000000..aa5ac3f
--- /dev/null
+++ b/obfuscator/csv_writer.py
@@ -0,0 +1,26 @@
+import csv
+import io
+from typing import List, Dict
+from obfuscator.logger import get_logger
+
+# Create the logger
+logger = get_logger("CSVWriter")
+
+
+def create_byte_stream(data: List[Dict[str, str]]) -> bytes:
+ if not data:
+ logger.info("No valid data was provided to write")
+ return b""
+
+ output = io.StringIO()
+
+ headers = list(data[0].keys())
+
+ writer = csv.DictWriter(output, fieldnames=headers)
+ writer.writeheader()
+ writer.writerows(data)
+
+ csv_string = output.getvalue()
+ logger.debug(f"CSV data: {csv_string}")
+
+ return csv_string.encode("utf-8")
diff --git a/test/test_csv_writer.py b/test/test_csv_writer.py
new file mode 100644
index 0000000..eceac28
--- /dev/null
+++ b/test/test_csv_writer.py
@@ -0,0 +1,57 @@
+import io
+import csv
+from obfuscator.csv_writer import create_byte_stream
+
+
+def csv_bytes_to_list(csv_bytes: bytes):
+ csv_string = csv_bytes.decode("utf-8")
+ f = io.StringIO(csv_string)
+ reader = csv.DictReader(f)
+ return [dict(row) for row in reader]
+
+
+def test_create_byte_stream_valid_data():
+ data = [
+ {"student_id": "1234", "name": "Student 1", "course": "Course 1"},
+ {"student_id": "5678", "name": "Student 2", "course": "Course 2"},
+ ]
+ csv_bytes = create_byte_stream(data)
+ result = csv_bytes_to_list(csv_bytes)
+ assert result == data
+
+
+def test_create_byte_stream_empty_data():
+ csv_bytes = create_byte_stream([])
+ assert csv_bytes == b""
+
+
+def test_create_byte_stream_handles_quoted_fields():
+ data = [
+ {"student_id": "1234", "name": 'Student "One"', "course": "Course, A"},
+ {"student_id": "5678", "name": 'Student "Two"', "course": "Course, B"},
+ ]
+ csv_bytes = create_byte_stream(data)
+ result = csv_bytes_to_list(csv_bytes)
+ assert result == data
+
+
+def test_create_byte_stream_consistent_header_order():
+ data = [
+ {"student_id": "1234", "name": "Alice", "course": "Math"},
+ {"student_id": "5678", "name": "Bob", "course": "Science"},
+ ]
+ csv_bytes = create_byte_stream(data)
+ csv_string = csv_bytes.decode("utf-8")
+ header_line = csv_string.splitlines()[0]
+ expected_header = ",".join(data[0].keys())
+ assert header_line == expected_header
+
+
+def test_create_byte_stream_special_characters():
+ data = [
+ {"student_id": "1234", "name": "Student 1", "course": "Line1\nLine2"},
+ {"student_id": "5678", "name": "Student 2", "course": "Value with, comma"},
+ ]
+ csv_bytes = create_byte_stream(data)
+ result = csv_bytes_to_list(csv_bytes)
+ assert result == data
git.ajschof.me — hosted by ajschofield — powered by cgit