diff options
| -rw-r--r-- | cli.py | 2 | ||||
| -rw-r--r-- | obfuscator/csv_writer.py | 26 | ||||
| -rw-r--r-- | test/test_csv_writer.py | 57 |
3 files changed, 85 insertions, 0 deletions
@@ -3,6 +3,7 @@ import json from obfuscator.csv_reader import CSVReader from obfuscator.obfuscate import obfuscate from obfuscator.logger import get_logger +from obfuscator.csv_writer import create_byte_stream # Create the logger logger = get_logger("CLI") @@ -44,6 +45,7 @@ def main(): obfuscated_data = obfuscate(data, args.pii) # For debug purposes, log the obfuscated data as JSON for readability logger.debug("Obfuscated data (JSON): " + json.dumps(obfuscated_data, indent=4)) + return create_byte_stream(obfuscated_data) # If the script is run directly (as it should be), call the main function diff --git a/obfuscator/csv_writer.py b/obfuscator/csv_writer.py new file mode 100644 index 0000000..aa5ac3f --- /dev/null +++ b/obfuscator/csv_writer.py @@ -0,0 +1,26 @@ +import csv +import io +from typing import List, Dict +from obfuscator.logger import get_logger + +# Create the logger +logger = get_logger("CSVWriter") + + +def create_byte_stream(data: List[Dict[str, str]]) -> bytes: + if not data: + logger.info("No valid data was provided to write") + return b"" + + output = io.StringIO() + + headers = list(data[0].keys()) + + writer = csv.DictWriter(output, fieldnames=headers) + writer.writeheader() + writer.writerows(data) + + csv_string = output.getvalue() + logger.debug(f"CSV data: {csv_string}") + + return csv_string.encode("utf-8") diff --git a/test/test_csv_writer.py b/test/test_csv_writer.py new file mode 100644 index 0000000..eceac28 --- /dev/null +++ b/test/test_csv_writer.py @@ -0,0 +1,57 @@ +import io +import csv +from obfuscator.csv_writer import create_byte_stream + + +def csv_bytes_to_list(csv_bytes: bytes): + csv_string = csv_bytes.decode("utf-8") + f = io.StringIO(csv_string) + reader = csv.DictReader(f) + return [dict(row) for row in reader] + + +def test_create_byte_stream_valid_data(): + data = [ + {"student_id": "1234", "name": "Student 1", "course": "Course 1"}, + {"student_id": "5678", "name": "Student 2", "course": "Course 2"}, + ] + csv_bytes = create_byte_stream(data) + result = csv_bytes_to_list(csv_bytes) + assert result == data + + +def test_create_byte_stream_empty_data(): + csv_bytes = create_byte_stream([]) + assert csv_bytes == b"" + + +def test_create_byte_stream_handles_quoted_fields(): + data = [ + {"student_id": "1234", "name": 'Student "One"', "course": "Course, A"}, + {"student_id": "5678", "name": 'Student "Two"', "course": "Course, B"}, + ] + csv_bytes = create_byte_stream(data) + result = csv_bytes_to_list(csv_bytes) + assert result == data + + +def test_create_byte_stream_consistent_header_order(): + data = [ + {"student_id": "1234", "name": "Alice", "course": "Math"}, + {"student_id": "5678", "name": "Bob", "course": "Science"}, + ] + csv_bytes = create_byte_stream(data) + csv_string = csv_bytes.decode("utf-8") + header_line = csv_string.splitlines()[0] + expected_header = ",".join(data[0].keys()) + assert header_line == expected_header + + +def test_create_byte_stream_special_characters(): + data = [ + {"student_id": "1234", "name": "Student 1", "course": "Line1\nLine2"}, + {"student_id": "5678", "name": "Student 2", "course": "Value with, comma"}, + ] + csv_bytes = create_byte_stream(data) + result = csv_bytes_to_list(csv_bytes) + assert result == data |
