From 689e43b4ba5e88faccbef9b0f7f3e45a4d519744 Mon Sep 17 00:00:00 2001 From: Alex Schofield Date: Tue, 11 Feb 2025 13:23:19 +0000 Subject: add initial tests for csv_reader --- test/test_csv_reader.py | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) create mode 100644 test/test_csv_reader.py (limited to 'test/test_csv_reader.py') diff --git a/test/test_csv_reader.py b/test/test_csv_reader.py new file mode 100644 index 0000000..f16a229 --- /dev/null +++ b/test/test_csv_reader.py @@ -0,0 +1,35 @@ +# csv_reader.py - tests +# Author: Alex Schofield + +from csv_reader import csv_reader + +def test_empty_csv_should_return_no_content(): + pass + +def test_csv_with_header_only_should_return_no_content(): + pass + +def test_csv_with_valid_data_should_return_obfuscated_content(): + pass + +def test_csv_with_quoted_fields_should_be_sanitised(): + pass + +def test_non_csv_file_should_return_no_content(): + pass + +def test_csv_file_with_embedded_newline_should_be_sanitised(): + pass + +def test_csv_file_with_embedded_comma_should_be_sanitised(): + pass + +def test_csv_file_with_embedded_quote_should_be_sanitised(): + pass + +def test_csv_file_with_null_values_should_be_transformed_to_empty_string(): + pass + +def test_csv_file_with_non_string_data_should_be_transformed_to_empty_string(): + pass + -- cgit v1.2.3 From 89396b2c69fb10260127c39ccf962b6ec6730049 Mon Sep 17 00:00:00 2001 From: Alex Schofield Date: Tue, 11 Feb 2025 13:45:20 +0000 Subject: add basic test logic for csv_reader --- test/test_csv_reader.py | 26 +++++++++++++++++++++----- 1 file changed, 21 insertions(+), 5 deletions(-) (limited to 'test/test_csv_reader.py') diff --git a/test/test_csv_reader.py b/test/test_csv_reader.py index f16a229..8189842 100644 --- a/test/test_csv_reader.py +++ b/test/test_csv_reader.py @@ -4,13 +4,29 @@ from csv_reader import csv_reader def test_empty_csv_should_return_no_content(): - pass + content = "" + result = csv_reader(content) + expected = [] + assert result == expected def test_csv_with_header_only_should_return_no_content(): - pass - -def test_csv_with_valid_data_should_return_obfuscated_content(): - pass + content = "student_id,name,course\n" + result = csv_reader(content) + expected = [] + assert result == expected + +def test_csv_with_valid_data(): + content = ( + "student_id,name,course\n" + "1234,Student 1,Course 1\n" + "5678,Student 2,Course 2\n" + ) + result = csv_reader(content) + expected = [ + {"student_id": "1234", "name": "Student 1", "course": "Course 1"}, + {"student_id": "5678", "name": "Student 2", "course": "Course 2"}, + ] + assert result == expected def test_csv_with_quoted_fields_should_be_sanitised(): pass -- cgit v1.2.3 From 3ec4c97f2f2fa07e63cc5781807f62883043490b Mon Sep 17 00:00:00 2001 From: Alex Schofield Date: Tue, 11 Feb 2025 13:48:39 +0000 Subject: skip tests that aren't functional yet --- test/test_csv_reader.py | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'test/test_csv_reader.py') diff --git a/test/test_csv_reader.py b/test/test_csv_reader.py index 8189842..38c3957 100644 --- a/test/test_csv_reader.py +++ b/test/test_csv_reader.py @@ -2,6 +2,7 @@ # Author: Alex Schofield from csv_reader import csv_reader +import pytest def test_empty_csv_should_return_no_content(): content = "" @@ -28,24 +29,31 @@ def test_csv_with_valid_data(): ] assert result == expected +@pytest.mark.skip(reason="Not implemented yet") def test_csv_with_quoted_fields_should_be_sanitised(): pass +@pytest.mark.skip(reason="Not implemented yet") def test_non_csv_file_should_return_no_content(): pass +@pytest.mark.skip(reason="Not implemented yet") def test_csv_file_with_embedded_newline_should_be_sanitised(): pass +@pytest.mark.skip(reason="Not implemented yet") def test_csv_file_with_embedded_comma_should_be_sanitised(): pass +@pytest.mark.skip(reason="Not implemented yet") def test_csv_file_with_embedded_quote_should_be_sanitised(): pass +@pytest.mark.skip(reason="Not implemented yet") def test_csv_file_with_null_values_should_be_transformed_to_empty_string(): pass +@pytest.mark.skip(reason="Not implemented yet") def test_csv_file_with_non_string_data_should_be_transformed_to_empty_string(): pass -- cgit v1.2.3 From 4452e3afd4fb6c7f6c3f44c90be64908aa2c67fb Mon Sep 17 00:00:00 2001 From: Alex Schofield Date: Tue, 11 Feb 2025 14:57:19 +0000 Subject: add functionality for testing quoted fields in csv_reader --- test/test_csv_reader.py | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) (limited to 'test/test_csv_reader.py') diff --git a/test/test_csv_reader.py b/test/test_csv_reader.py index 38c3957..7d13d5a 100644 --- a/test/test_csv_reader.py +++ b/test/test_csv_reader.py @@ -29,9 +29,18 @@ def test_csv_with_valid_data(): ] assert result == expected -@pytest.mark.skip(reason="Not implemented yet") def test_csv_with_quoted_fields_should_be_sanitised(): - pass + content = ( + 'student_id,name,course\n' + '1234,"Student 1","Course 1"\n' + '5678,"Student 2","Course 2"\n' + ) + result = csv_reader(content) + expected = [ + {"student_id": "1234", "name": "Student 1", "course": "Course 1"}, + {"student_id": "5678", "name": "Student 2", "course": "Course 2"}, + ] + assert result == expected @pytest.mark.skip(reason="Not implemented yet") def test_non_csv_file_should_return_no_content(): -- cgit v1.2.3 From 55fec4960b80bfd94fa094a862a6c84dd30ca874 Mon Sep 17 00:00:00 2001 From: Alex Schofield Date: Tue, 11 Feb 2025 23:15:45 +0000 Subject: rename csv_reader.py to main.py --- src/csv_reader.py | 8 -------- src/main.py | 8 ++++++++ test/test_csv_reader.py | 2 +- 3 files changed, 9 insertions(+), 9 deletions(-) delete mode 100644 src/csv_reader.py create mode 100644 src/main.py (limited to 'test/test_csv_reader.py') diff --git a/src/csv_reader.py b/src/csv_reader.py deleted file mode 100644 index 23afc66..0000000 --- a/src/csv_reader.py +++ /dev/null @@ -1,8 +0,0 @@ -import csv -from io import StringIO -from typing import List, Dict - -def csv_reader(content: str) -> List[Dict[str, str]]: - f = StringIO(content) - reader = csv.DictReader(f) - return list(reader) diff --git a/src/main.py b/src/main.py new file mode 100644 index 0000000..23afc66 --- /dev/null +++ b/src/main.py @@ -0,0 +1,8 @@ +import csv +from io import StringIO +from typing import List, Dict + +def csv_reader(content: str) -> List[Dict[str, str]]: + f = StringIO(content) + reader = csv.DictReader(f) + return list(reader) diff --git a/test/test_csv_reader.py b/test/test_csv_reader.py index 7d13d5a..d96a2d1 100644 --- a/test/test_csv_reader.py +++ b/test/test_csv_reader.py @@ -1,7 +1,7 @@ # csv_reader.py - tests # Author: Alex Schofield -from csv_reader import csv_reader +from main import csv_reader import pytest def test_empty_csv_should_return_no_content(): -- cgit v1.2.3 From 720aa27906117682b4c6c1d7f8b1fd4d9b15bdf8 Mon Sep 17 00:00:00 2001 From: Alex Schofield Date: Wed, 12 Feb 2025 01:30:46 +0000 Subject: reformat tests for csv_reader --- test/test_csv_reader.py | 29 ++--------------------------- 1 file changed, 2 insertions(+), 27 deletions(-) (limited to 'test/test_csv_reader.py') diff --git a/test/test_csv_reader.py b/test/test_csv_reader.py index d96a2d1..3fb2fa8 100644 --- a/test/test_csv_reader.py +++ b/test/test_csv_reader.py @@ -29,7 +29,7 @@ def test_csv_with_valid_data(): ] assert result == expected -def test_csv_with_quoted_fields_should_be_sanitised(): +def test_csv_with_quoted_fields_should_run_as_expected(): content = ( 'student_id,name,course\n' '1234,"Student 1","Course 1"\n' @@ -40,29 +40,4 @@ def test_csv_with_quoted_fields_should_be_sanitised(): {"student_id": "1234", "name": "Student 1", "course": "Course 1"}, {"student_id": "5678", "name": "Student 2", "course": "Course 2"}, ] - assert result == expected - -@pytest.mark.skip(reason="Not implemented yet") -def test_non_csv_file_should_return_no_content(): - pass - -@pytest.mark.skip(reason="Not implemented yet") -def test_csv_file_with_embedded_newline_should_be_sanitised(): - pass - -@pytest.mark.skip(reason="Not implemented yet") -def test_csv_file_with_embedded_comma_should_be_sanitised(): - pass - -@pytest.mark.skip(reason="Not implemented yet") -def test_csv_file_with_embedded_quote_should_be_sanitised(): - pass - -@pytest.mark.skip(reason="Not implemented yet") -def test_csv_file_with_null_values_should_be_transformed_to_empty_string(): - pass - -@pytest.mark.skip(reason="Not implemented yet") -def test_csv_file_with_non_string_data_should_be_transformed_to_empty_string(): - pass - + assert result == expected \ No newline at end of file -- cgit v1.2.3 From ecadaaf8d0d0f22b3ff24fa5fd99845da262d2af Mon Sep 17 00:00:00 2001 From: Alex Schofield Date: Wed, 12 Feb 2025 01:33:44 +0000 Subject: add test for non-csv file output for csv_reader --- test/test_csv_reader.py | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'test/test_csv_reader.py') diff --git a/test/test_csv_reader.py b/test/test_csv_reader.py index 3fb2fa8..d245d7c 100644 --- a/test/test_csv_reader.py +++ b/test/test_csv_reader.py @@ -40,4 +40,10 @@ def test_csv_with_quoted_fields_should_run_as_expected(): {"student_id": "1234", "name": "Student 1", "course": "Course 1"}, {"student_id": "5678", "name": "Student 2", "course": "Course 2"}, ] + assert result == expected + +def test_non_csv_file_should_return_no_content(): + content = "" + result = csv_reader(content) + expected = [] assert result == expected \ No newline at end of file -- cgit v1.2.3 From 0f29d52ba932eeed2ae5826c31cbe9a379fd4579 Mon Sep 17 00:00:00 2001 From: "deepsource-autofix[bot]" <62050782+deepsource-autofix[bot]@users.noreply.github.com> Date: Fri, 14 Feb 2025 13:47:05 +0000 Subject: style: format code with Autopep8, Black and Ruff Formatter This commit fixes the style issues introduced in 6182930 according to the output from Autopep8, Black and Ruff Formatter. Details: https://github.com/ajschofield/gdpr-obfuscator/pull/1 --- cli.py | 2 ++ obfuscator/csv_reader.py | 4 +--- obfuscator/logger.py | 7 +++++-- test/test_csv_reader.py | 15 ++++++++++----- 4 files changed, 18 insertions(+), 10 deletions(-) (limited to 'test/test_csv_reader.py') diff --git a/cli.py b/cli.py index 23c9057..652830e 100644 --- a/cli.py +++ b/cli.py @@ -4,6 +4,7 @@ from obfuscator.logger import get_logger logger = get_logger("CLI") + def main(): parser = argparse.ArgumentParser(description="gdpr-obfuscator") # Require user to either choose a local file or an S3 object @@ -20,5 +21,6 @@ def main(): else: logger.debug("User chose to read CSV from S3") + if __name__ == "__main__": main() diff --git a/obfuscator/csv_reader.py b/obfuscator/csv_reader.py index 42e8f8a..5bc91dc 100644 --- a/obfuscator/csv_reader.py +++ b/obfuscator/csv_reader.py @@ -4,6 +4,7 @@ from logger import get_logger logger = get_logger("CSVReader") + class CSVReader: def __init__(self, path: str): self.path = path @@ -27,6 +28,3 @@ class CSVReader: def read_s3(self) -> List[Dict[str, str]]: return [] - - - diff --git a/obfuscator/logger.py b/obfuscator/logger.py index 52c4f5e..61a75ec 100644 --- a/obfuscator/logger.py +++ b/obfuscator/logger.py @@ -1,6 +1,7 @@ import logging import os + def get_logger(name: str) -> logging.Logger: logger = logging.getLogger(name) @@ -13,9 +14,11 @@ def get_logger(name: str) -> logging.Logger: logger.setLevel(log_level) handler = logging.StreamHandler() - format = logging.Formatter("%(asctime)s - %(levelname)s - %(name)s - %(message)s") + format = logging.Formatter( + "%(asctime)s - %(levelname)s - %(name)s - %(message)s" + ) handler.setFormatter(format) logger.addHandler(handler) - return logger \ No newline at end of file + return logger diff --git a/test/test_csv_reader.py b/test/test_csv_reader.py index d245d7c..ac43b04 100644 --- a/test/test_csv_reader.py +++ b/test/test_csv_reader.py @@ -4,34 +4,38 @@ from main import csv_reader import pytest + def test_empty_csv_should_return_no_content(): content = "" result = csv_reader(content) expected = [] assert result == expected + def test_csv_with_header_only_should_return_no_content(): content = "student_id,name,course\n" result = csv_reader(content) expected = [] assert result == expected + def test_csv_with_valid_data(): content = ( "student_id,name,course\n" "1234,Student 1,Course 1\n" "5678,Student 2,Course 2\n" - ) + ) result = csv_reader(content) expected = [ {"student_id": "1234", "name": "Student 1", "course": "Course 1"}, {"student_id": "5678", "name": "Student 2", "course": "Course 2"}, - ] - assert result == expected + ] + assert result == expected + def test_csv_with_quoted_fields_should_run_as_expected(): content = ( - 'student_id,name,course\n' + "student_id,name,course\n" '1234,"Student 1","Course 1"\n' '5678,"Student 2","Course 2"\n' ) @@ -42,8 +46,9 @@ def test_csv_with_quoted_fields_should_run_as_expected(): ] assert result == expected + def test_non_csv_file_should_return_no_content(): content = "" result = csv_reader(content) expected = [] - assert result == expected \ No newline at end of file + assert result == expected -- cgit v1.2.3 From ad1d342894d31838c3b1b98eb66c5498c101e8ac Mon Sep 17 00:00:00 2001 From: Alex Schofield Date: Fri, 14 Feb 2025 14:01:05 +0000 Subject: create CSVReader instance for tests - broken --- test/test_csv_reader.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'test/test_csv_reader.py') diff --git a/test/test_csv_reader.py b/test/test_csv_reader.py index d245d7c..0098e0f 100644 --- a/test/test_csv_reader.py +++ b/test/test_csv_reader.py @@ -1,9 +1,13 @@ # csv_reader.py - tests # Author: Alex Schofield -from main import csv_reader +from obfuscator.csv_reader import CSVReader import pytest +reader = CSVReader() + +### TODO : TESTS ARE BROKEN, FIX THEM IN NEXT BRANCH ### + def test_empty_csv_should_return_no_content(): content = "" result = csv_reader(content) -- cgit v1.2.3 From c8df3c6d06dcb2ba6720a17bf3b6db67693c41dd Mon Sep 17 00:00:00 2001 From: "deepsource-autofix[bot]" <62050782+deepsource-autofix[bot]@users.noreply.github.com> Date: Fri, 14 Feb 2025 14:04:18 +0000 Subject: style: format code with Autopep8, Black and Ruff Formatter This commit fixes the style issues introduced in 5082d02 according to the output from Autopep8, Black and Ruff Formatter. Details: https://github.com/ajschofield/gdpr-obfuscator/pull/1 --- test/test_csv_reader.py | 1 + 1 file changed, 1 insertion(+) (limited to 'test/test_csv_reader.py') diff --git a/test/test_csv_reader.py b/test/test_csv_reader.py index fb5996d..03c6e4b 100644 --- a/test/test_csv_reader.py +++ b/test/test_csv_reader.py @@ -8,6 +8,7 @@ reader = CSVReader() ### TODO : TESTS ARE BROKEN, FIX THEM IN NEXT BRANCH ### + def test_empty_csv_should_return_no_content(): content = "" result = csv_reader(content) -- cgit v1.2.3 From 92a7f1ae67e825eefb9488637added14cd5f7d53 Mon Sep 17 00:00:00 2001 From: Alex Schofield Date: Mon, 17 Feb 2025 01:07:40 +0000 Subject: fix tests for csv_reader by using new read_string method --- test/test_csv_reader.py | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) (limited to 'test/test_csv_reader.py') diff --git a/test/test_csv_reader.py b/test/test_csv_reader.py index 03c6e4b..48a808c 100644 --- a/test/test_csv_reader.py +++ b/test/test_csv_reader.py @@ -6,19 +6,16 @@ import pytest reader = CSVReader() -### TODO : TESTS ARE BROKEN, FIX THEM IN NEXT BRANCH ### - - def test_empty_csv_should_return_no_content(): content = "" - result = csv_reader(content) + result = reader.read_string(content) expected = [] assert result == expected def test_csv_with_header_only_should_return_no_content(): content = "student_id,name,course\n" - result = csv_reader(content) + result = reader.read_string(content) expected = [] assert result == expected @@ -29,7 +26,7 @@ def test_csv_with_valid_data(): "1234,Student 1,Course 1\n" "5678,Student 2,Course 2\n" ) - result = csv_reader(content) + result = reader.read_string(content) expected = [ {"student_id": "1234", "name": "Student 1", "course": "Course 1"}, {"student_id": "5678", "name": "Student 2", "course": "Course 2"}, @@ -43,7 +40,7 @@ def test_csv_with_quoted_fields_should_run_as_expected(): '1234,"Student 1","Course 1"\n' '5678,"Student 2","Course 2"\n' ) - result = csv_reader(content) + result = reader.read_string(content) expected = [ {"student_id": "1234", "name": "Student 1", "course": "Course 1"}, {"student_id": "5678", "name": "Student 2", "course": "Course 2"}, @@ -53,6 +50,6 @@ def test_csv_with_quoted_fields_should_run_as_expected(): def test_non_csv_file_should_return_no_content(): content = "" - result = csv_reader(content) + result = reader.read_string(content) expected = [] assert result == expected -- cgit v1.2.3 From cf1376862fb2f58c2e837338ed9c765439ffa1b9 Mon Sep 17 00:00:00 2001 From: "deepsource-autofix[bot]" <62050782+deepsource-autofix[bot]@users.noreply.github.com> Date: Mon, 17 Feb 2025 01:17:37 +0000 Subject: style: format code with Autopep8, Black and Ruff Formatter This commit fixes the style issues introduced in f612f71 according to the output from Autopep8, Black and Ruff Formatter. Details: https://github.com/ajschofield/gdpr-obfuscator/pull/1 --- obfuscator/csv_reader.py | 5 +++-- test/test_csv_reader.py | 1 + 2 files changed, 4 insertions(+), 2 deletions(-) (limited to 'test/test_csv_reader.py') diff --git a/obfuscator/csv_reader.py b/obfuscator/csv_reader.py index 3b4496b..b9dccdb 100644 --- a/obfuscator/csv_reader.py +++ b/obfuscator/csv_reader.py @@ -5,6 +5,7 @@ from obfuscator.logger import get_logger logger = get_logger("CSVReader") + class CSVReader: @staticmethod def read_local(path) -> List[Dict[str, str]]: @@ -18,7 +19,7 @@ class CSVReader: logger.error(f"File not found: {path}") except Exception as e: logger.error(f"Error reading file: {e}") - + @staticmethod def read_s3(path) -> List[Dict[str, str]]: return [] @@ -27,7 +28,7 @@ class CSVReader: def read_string(content: str) -> List[Dict[str, str]]: if not content.strip(): return [] - + f = io.StringIO(content) reader = csv.DictReader(f) return [dict(row) for row in reader] diff --git a/test/test_csv_reader.py b/test/test_csv_reader.py index 48a808c..e62c093 100644 --- a/test/test_csv_reader.py +++ b/test/test_csv_reader.py @@ -6,6 +6,7 @@ import pytest reader = CSVReader() + def test_empty_csv_should_return_no_content(): content = "" result = reader.read_string(content) -- cgit v1.2.3