From 689e43b4ba5e88faccbef9b0f7f3e45a4d519744 Mon Sep 17 00:00:00 2001 From: Alex Schofield Date: Tue, 11 Feb 2025 13:23:19 +0000 Subject: add initial tests for csv_reader --- Pipfile | 12 ++++++++++++ pytest.ini | 2 ++ src/csv_reader.py | 2 ++ test/test_csv_reader.py | 35 +++++++++++++++++++++++++++++++++++ 4 files changed, 51 insertions(+) create mode 100644 Pipfile create mode 100644 pytest.ini create mode 100644 src/csv_reader.py create mode 100644 test/test_csv_reader.py diff --git a/Pipfile b/Pipfile new file mode 100644 index 0000000..e915499 --- /dev/null +++ b/Pipfile @@ -0,0 +1,12 @@ +[[source]] +url = "https://pypi.org/simple" +verify_ssl = true +name = "pypi" + +[packages] + +[dev-packages] +pytest = "*" + +[requires] +python_version = "3.13" diff --git a/pytest.ini b/pytest.ini new file mode 100644 index 0000000..ad5c7cc --- /dev/null +++ b/pytest.ini @@ -0,0 +1,2 @@ +[pytest] +pythonpath = . src diff --git a/src/csv_reader.py b/src/csv_reader.py new file mode 100644 index 0000000..2556db6 --- /dev/null +++ b/src/csv_reader.py @@ -0,0 +1,2 @@ +def csv_reader(): + pass diff --git a/test/test_csv_reader.py b/test/test_csv_reader.py new file mode 100644 index 0000000..f16a229 --- /dev/null +++ b/test/test_csv_reader.py @@ -0,0 +1,35 @@ +# csv_reader.py - tests +# Author: Alex Schofield + +from csv_reader import csv_reader + +def test_empty_csv_should_return_no_content(): + pass + +def test_csv_with_header_only_should_return_no_content(): + pass + +def test_csv_with_valid_data_should_return_obfuscated_content(): + pass + +def test_csv_with_quoted_fields_should_be_sanitised(): + pass + +def test_non_csv_file_should_return_no_content(): + pass + +def test_csv_file_with_embedded_newline_should_be_sanitised(): + pass + +def test_csv_file_with_embedded_comma_should_be_sanitised(): + pass + +def test_csv_file_with_embedded_quote_should_be_sanitised(): + pass + +def test_csv_file_with_null_values_should_be_transformed_to_empty_string(): + pass + +def test_csv_file_with_non_string_data_should_be_transformed_to_empty_string(): + pass + -- cgit v1.2.3 From 89396b2c69fb10260127c39ccf962b6ec6730049 Mon Sep 17 00:00:00 2001 From: Alex Schofield Date: Tue, 11 Feb 2025 13:45:20 +0000 Subject: add basic test logic for csv_reader --- test/test_csv_reader.py | 26 +++++++++++++++++++++----- 1 file changed, 21 insertions(+), 5 deletions(-) diff --git a/test/test_csv_reader.py b/test/test_csv_reader.py index f16a229..8189842 100644 --- a/test/test_csv_reader.py +++ b/test/test_csv_reader.py @@ -4,13 +4,29 @@ from csv_reader import csv_reader def test_empty_csv_should_return_no_content(): - pass + content = "" + result = csv_reader(content) + expected = [] + assert result == expected def test_csv_with_header_only_should_return_no_content(): - pass - -def test_csv_with_valid_data_should_return_obfuscated_content(): - pass + content = "student_id,name,course\n" + result = csv_reader(content) + expected = [] + assert result == expected + +def test_csv_with_valid_data(): + content = ( + "student_id,name,course\n" + "1234,Student 1,Course 1\n" + "5678,Student 2,Course 2\n" + ) + result = csv_reader(content) + expected = [ + {"student_id": "1234", "name": "Student 1", "course": "Course 1"}, + {"student_id": "5678", "name": "Student 2", "course": "Course 2"}, + ] + assert result == expected def test_csv_with_quoted_fields_should_be_sanitised(): pass -- cgit v1.2.3 From 904d1951567fc496ff13f6fbaaac4aadedd391f8 Mon Sep 17 00:00:00 2001 From: Alex Schofield Date: Tue, 11 Feb 2025 13:46:28 +0000 Subject: add basic functionality for csv_reader --- src/csv_reader.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src/csv_reader.py b/src/csv_reader.py index 2556db6..81f1485 100644 --- a/src/csv_reader.py +++ b/src/csv_reader.py @@ -1,2 +1,7 @@ -def csv_reader(): - pass +import csv +from io import StringIO + +def csv_reader(file): + f = StringIO(file) + reader = csv.DictReader(f) + return list(reader) -- cgit v1.2.3 From 3ec4c97f2f2fa07e63cc5781807f62883043490b Mon Sep 17 00:00:00 2001 From: Alex Schofield Date: Tue, 11 Feb 2025 13:48:39 +0000 Subject: skip tests that aren't functional yet --- test/test_csv_reader.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/test/test_csv_reader.py b/test/test_csv_reader.py index 8189842..38c3957 100644 --- a/test/test_csv_reader.py +++ b/test/test_csv_reader.py @@ -2,6 +2,7 @@ # Author: Alex Schofield from csv_reader import csv_reader +import pytest def test_empty_csv_should_return_no_content(): content = "" @@ -28,24 +29,31 @@ def test_csv_with_valid_data(): ] assert result == expected +@pytest.mark.skip(reason="Not implemented yet") def test_csv_with_quoted_fields_should_be_sanitised(): pass +@pytest.mark.skip(reason="Not implemented yet") def test_non_csv_file_should_return_no_content(): pass +@pytest.mark.skip(reason="Not implemented yet") def test_csv_file_with_embedded_newline_should_be_sanitised(): pass +@pytest.mark.skip(reason="Not implemented yet") def test_csv_file_with_embedded_comma_should_be_sanitised(): pass +@pytest.mark.skip(reason="Not implemented yet") def test_csv_file_with_embedded_quote_should_be_sanitised(): pass +@pytest.mark.skip(reason="Not implemented yet") def test_csv_file_with_null_values_should_be_transformed_to_empty_string(): pass +@pytest.mark.skip(reason="Not implemented yet") def test_csv_file_with_non_string_data_should_be_transformed_to_empty_string(): pass -- cgit v1.2.3 From 4452e3afd4fb6c7f6c3f44c90be64908aa2c67fb Mon Sep 17 00:00:00 2001 From: Alex Schofield Date: Tue, 11 Feb 2025 14:57:19 +0000 Subject: add functionality for testing quoted fields in csv_reader --- test/test_csv_reader.py | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/test/test_csv_reader.py b/test/test_csv_reader.py index 38c3957..7d13d5a 100644 --- a/test/test_csv_reader.py +++ b/test/test_csv_reader.py @@ -29,9 +29,18 @@ def test_csv_with_valid_data(): ] assert result == expected -@pytest.mark.skip(reason="Not implemented yet") def test_csv_with_quoted_fields_should_be_sanitised(): - pass + content = ( + 'student_id,name,course\n' + '1234,"Student 1","Course 1"\n' + '5678,"Student 2","Course 2"\n' + ) + result = csv_reader(content) + expected = [ + {"student_id": "1234", "name": "Student 1", "course": "Course 1"}, + {"student_id": "5678", "name": "Student 2", "course": "Course 2"}, + ] + assert result == expected @pytest.mark.skip(reason="Not implemented yet") def test_non_csv_file_should_return_no_content(): -- cgit v1.2.3 From ac27aaea71cb9a035446f50d3dc0148ec144ec88 Mon Sep 17 00:00:00 2001 From: Alex Schofield Date: Tue, 11 Feb 2025 23:14:30 +0000 Subject: add type checking to csv_reader function --- src/csv_reader.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/csv_reader.py b/src/csv_reader.py index 81f1485..23afc66 100644 --- a/src/csv_reader.py +++ b/src/csv_reader.py @@ -1,7 +1,8 @@ import csv from io import StringIO +from typing import List, Dict -def csv_reader(file): - f = StringIO(file) +def csv_reader(content: str) -> List[Dict[str, str]]: + f = StringIO(content) reader = csv.DictReader(f) return list(reader) -- cgit v1.2.3 From 55fec4960b80bfd94fa094a862a6c84dd30ca874 Mon Sep 17 00:00:00 2001 From: Alex Schofield Date: Tue, 11 Feb 2025 23:15:45 +0000 Subject: rename csv_reader.py to main.py --- src/csv_reader.py | 8 -------- src/main.py | 8 ++++++++ test/test_csv_reader.py | 2 +- 3 files changed, 9 insertions(+), 9 deletions(-) delete mode 100644 src/csv_reader.py create mode 100644 src/main.py diff --git a/src/csv_reader.py b/src/csv_reader.py deleted file mode 100644 index 23afc66..0000000 --- a/src/csv_reader.py +++ /dev/null @@ -1,8 +0,0 @@ -import csv -from io import StringIO -from typing import List, Dict - -def csv_reader(content: str) -> List[Dict[str, str]]: - f = StringIO(content) - reader = csv.DictReader(f) - return list(reader) diff --git a/src/main.py b/src/main.py new file mode 100644 index 0000000..23afc66 --- /dev/null +++ b/src/main.py @@ -0,0 +1,8 @@ +import csv +from io import StringIO +from typing import List, Dict + +def csv_reader(content: str) -> List[Dict[str, str]]: + f = StringIO(content) + reader = csv.DictReader(f) + return list(reader) diff --git a/test/test_csv_reader.py b/test/test_csv_reader.py index 7d13d5a..d96a2d1 100644 --- a/test/test_csv_reader.py +++ b/test/test_csv_reader.py @@ -1,7 +1,7 @@ # csv_reader.py - tests # Author: Alex Schofield -from csv_reader import csv_reader +from main import csv_reader import pytest def test_empty_csv_should_return_no_content(): -- cgit v1.2.3 From 1e13c14d44bd318332747e0e12955e4e71aada20 Mon Sep 17 00:00:00 2001 From: Alex Schofield Date: Wed, 12 Feb 2025 00:18:57 +0000 Subject: remove Pipfile --- Pipfile | 12 ------------ 1 file changed, 12 deletions(-) delete mode 100644 Pipfile diff --git a/Pipfile b/Pipfile deleted file mode 100644 index e915499..0000000 --- a/Pipfile +++ /dev/null @@ -1,12 +0,0 @@ -[[source]] -url = "https://pypi.org/simple" -verify_ssl = true -name = "pypi" - -[packages] - -[dev-packages] -pytest = "*" - -[requires] -python_version = "3.13" -- cgit v1.2.3 From fcca49eed0e3734e5b33f8ef16128e3a37f7f772 Mon Sep 17 00:00:00 2001 From: Alex Schofield Date: Wed, 12 Feb 2025 00:38:20 +0000 Subject: ignore poetry.lock in .gitignore --- .gitignore | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index ecda235..78eaef5 100644 --- a/.gitignore +++ b/.gitignore @@ -105,7 +105,7 @@ Pipfile.lock # This is especially recommended for binary packages to ensure reproducibility, and is more # commonly ignored for libraries. # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control -#poetry.lock +poetry.lock # pdm # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. -- cgit v1.2.3 From 6621bdbc405ba71a208a1967d7d7a068a5c203ab Mon Sep 17 00:00:00 2001 From: Alex Schofield Date: Wed, 12 Feb 2025 00:46:48 +0000 Subject: add pyproject.toml for poetry --- pyproject.toml | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) create mode 100644 pyproject.toml diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..a629cbe --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,23 @@ +[project] +name = "gdpr-obfuscator" +version = "0.1.0" +description = "A Python library designed to detect and remove Personally Identifiable Information (PII) from CSV files stored in an AWS S3 bucket." +authors = [ + {name = "Alex Schofield",email = "git@ajschof.me"} +] +readme = "README.md" +requires-python = ">=3.13" +dependencies = [ +] + +[tool.poetry] +packages = [ + { include = "gdpr-obfuscator", from = "src" } +] + +[tool.poetry.group.dev.dependencies] +pytest = "8.3.4" + +[build-system] +requires = ["poetry-core>=2.0.0,<3.0.0"] +build-backend = "poetry.core.masonry.api" -- cgit v1.2.3 From 720aa27906117682b4c6c1d7f8b1fd4d9b15bdf8 Mon Sep 17 00:00:00 2001 From: Alex Schofield Date: Wed, 12 Feb 2025 01:30:46 +0000 Subject: reformat tests for csv_reader --- test/test_csv_reader.py | 29 ++--------------------------- 1 file changed, 2 insertions(+), 27 deletions(-) diff --git a/test/test_csv_reader.py b/test/test_csv_reader.py index d96a2d1..3fb2fa8 100644 --- a/test/test_csv_reader.py +++ b/test/test_csv_reader.py @@ -29,7 +29,7 @@ def test_csv_with_valid_data(): ] assert result == expected -def test_csv_with_quoted_fields_should_be_sanitised(): +def test_csv_with_quoted_fields_should_run_as_expected(): content = ( 'student_id,name,course\n' '1234,"Student 1","Course 1"\n' @@ -40,29 +40,4 @@ def test_csv_with_quoted_fields_should_be_sanitised(): {"student_id": "1234", "name": "Student 1", "course": "Course 1"}, {"student_id": "5678", "name": "Student 2", "course": "Course 2"}, ] - assert result == expected - -@pytest.mark.skip(reason="Not implemented yet") -def test_non_csv_file_should_return_no_content(): - pass - -@pytest.mark.skip(reason="Not implemented yet") -def test_csv_file_with_embedded_newline_should_be_sanitised(): - pass - -@pytest.mark.skip(reason="Not implemented yet") -def test_csv_file_with_embedded_comma_should_be_sanitised(): - pass - -@pytest.mark.skip(reason="Not implemented yet") -def test_csv_file_with_embedded_quote_should_be_sanitised(): - pass - -@pytest.mark.skip(reason="Not implemented yet") -def test_csv_file_with_null_values_should_be_transformed_to_empty_string(): - pass - -@pytest.mark.skip(reason="Not implemented yet") -def test_csv_file_with_non_string_data_should_be_transformed_to_empty_string(): - pass - + assert result == expected \ No newline at end of file -- cgit v1.2.3 From ecadaaf8d0d0f22b3ff24fa5fd99845da262d2af Mon Sep 17 00:00:00 2001 From: Alex Schofield Date: Wed, 12 Feb 2025 01:33:44 +0000 Subject: add test for non-csv file output for csv_reader --- test/test_csv_reader.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/test/test_csv_reader.py b/test/test_csv_reader.py index 3fb2fa8..d245d7c 100644 --- a/test/test_csv_reader.py +++ b/test/test_csv_reader.py @@ -40,4 +40,10 @@ def test_csv_with_quoted_fields_should_run_as_expected(): {"student_id": "1234", "name": "Student 1", "course": "Course 1"}, {"student_id": "5678", "name": "Student 2", "course": "Course 2"}, ] + assert result == expected + +def test_non_csv_file_should_return_no_content(): + content = "" + result = csv_reader(content) + expected = [] assert result == expected \ No newline at end of file -- cgit v1.2.3 From a5696fe5939119d091117b0d81976ac1dd708692 Mon Sep 17 00:00:00 2001 From: Alex Schofield Date: Wed, 12 Feb 2025 01:49:24 +0000 Subject: add logging & move main.py to project root --- main.py | 20 ++++++++++++++++++++ src/main.py | 8 -------- 2 files changed, 20 insertions(+), 8 deletions(-) create mode 100644 main.py delete mode 100644 src/main.py diff --git a/main.py b/main.py new file mode 100644 index 0000000..8ee40e9 --- /dev/null +++ b/main.py @@ -0,0 +1,20 @@ +import csv +from io import StringIO +from typing import List, Dict +import logging + +logger = logging.getLogger(__name__) +logger.setLevel(logging.INFO) +if not logger.handlers: + handler = logging.StreamHandler() + formatter = logging.Formatter('%(asctime)s %(levelname)s %(message)s') + handler.setFormatter(formatter) + logger.addHandler(handler) + +def csv_reader(content: str) -> List[Dict[str, str]]: + f = StringIO(content) + reader = csv.DictReader(f) + logger.info("Finished reading CSV!") + return list(reader) + + diff --git a/src/main.py b/src/main.py deleted file mode 100644 index 23afc66..0000000 --- a/src/main.py +++ /dev/null @@ -1,8 +0,0 @@ -import csv -from io import StringIO -from typing import List, Dict - -def csv_reader(content: str) -> List[Dict[str, str]]: - f = StringIO(content) - reader = csv.DictReader(f) - return list(reader) -- cgit v1.2.3 From 7dab7013b724bfb86fa666353b4fe887f7de0b71 Mon Sep 17 00:00:00 2001 From: Alex Schofield Date: Wed, 12 Feb 2025 01:52:57 +0000 Subject: restore original project layout --- main.py | 20 -------------------- src/csv_reader.py | 20 ++++++++++++++++++++ 2 files changed, 20 insertions(+), 20 deletions(-) delete mode 100644 main.py create mode 100644 src/csv_reader.py diff --git a/main.py b/main.py deleted file mode 100644 index 8ee40e9..0000000 --- a/main.py +++ /dev/null @@ -1,20 +0,0 @@ -import csv -from io import StringIO -from typing import List, Dict -import logging - -logger = logging.getLogger(__name__) -logger.setLevel(logging.INFO) -if not logger.handlers: - handler = logging.StreamHandler() - formatter = logging.Formatter('%(asctime)s %(levelname)s %(message)s') - handler.setFormatter(formatter) - logger.addHandler(handler) - -def csv_reader(content: str) -> List[Dict[str, str]]: - f = StringIO(content) - reader = csv.DictReader(f) - logger.info("Finished reading CSV!") - return list(reader) - - diff --git a/src/csv_reader.py b/src/csv_reader.py new file mode 100644 index 0000000..8ee40e9 --- /dev/null +++ b/src/csv_reader.py @@ -0,0 +1,20 @@ +import csv +from io import StringIO +from typing import List, Dict +import logging + +logger = logging.getLogger(__name__) +logger.setLevel(logging.INFO) +if not logger.handlers: + handler = logging.StreamHandler() + formatter = logging.Formatter('%(asctime)s %(levelname)s %(message)s') + handler.setFormatter(formatter) + logger.addHandler(handler) + +def csv_reader(content: str) -> List[Dict[str, str]]: + f = StringIO(content) + reader = csv.DictReader(f) + logger.info("Finished reading CSV!") + return list(reader) + + -- cgit v1.2.3 From 7de853bf277c8915213ce19ffa676df7a15865e7 Mon Sep 17 00:00:00 2001 From: Alex Schofield Date: Wed, 12 Feb 2025 01:53:53 +0000 Subject: expose library functions through __init__.py --- src/__init__.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 src/__init__.py diff --git a/src/__init__.py b/src/__init__.py new file mode 100644 index 0000000..e69de29 -- cgit v1.2.3 From 5e125e6c561c81526c2f4c4c7895fe720ba64d90 Mon Sep 17 00:00:00 2001 From: Alex Schofield Date: Wed, 12 Feb 2025 01:56:14 +0000 Subject: re-organise project structure & update pyproject.toml --- obfuscator/__init__.py | 0 obfuscator/csv_reader.py | 20 ++++++++++++++++++++ pyproject.toml | 2 +- src/__init__.py | 0 src/csv_reader.py | 20 -------------------- 5 files changed, 21 insertions(+), 21 deletions(-) create mode 100644 obfuscator/__init__.py create mode 100644 obfuscator/csv_reader.py delete mode 100644 src/__init__.py delete mode 100644 src/csv_reader.py diff --git a/obfuscator/__init__.py b/obfuscator/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/obfuscator/csv_reader.py b/obfuscator/csv_reader.py new file mode 100644 index 0000000..8ee40e9 --- /dev/null +++ b/obfuscator/csv_reader.py @@ -0,0 +1,20 @@ +import csv +from io import StringIO +from typing import List, Dict +import logging + +logger = logging.getLogger(__name__) +logger.setLevel(logging.INFO) +if not logger.handlers: + handler = logging.StreamHandler() + formatter = logging.Formatter('%(asctime)s %(levelname)s %(message)s') + handler.setFormatter(formatter) + logger.addHandler(handler) + +def csv_reader(content: str) -> List[Dict[str, str]]: + f = StringIO(content) + reader = csv.DictReader(f) + logger.info("Finished reading CSV!") + return list(reader) + + diff --git a/pyproject.toml b/pyproject.toml index a629cbe..4aa227a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -12,7 +12,7 @@ dependencies = [ [tool.poetry] packages = [ - { include = "gdpr-obfuscator", from = "src" } + { include = "gdpr-obfuscator", from = "obfuscator" } ] [tool.poetry.group.dev.dependencies] diff --git a/src/__init__.py b/src/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/src/csv_reader.py b/src/csv_reader.py deleted file mode 100644 index 8ee40e9..0000000 --- a/src/csv_reader.py +++ /dev/null @@ -1,20 +0,0 @@ -import csv -from io import StringIO -from typing import List, Dict -import logging - -logger = logging.getLogger(__name__) -logger.setLevel(logging.INFO) -if not logger.handlers: - handler = logging.StreamHandler() - formatter = logging.Formatter('%(asctime)s %(levelname)s %(message)s') - handler.setFormatter(formatter) - logger.addHandler(handler) - -def csv_reader(content: str) -> List[Dict[str, str]]: - f = StringIO(content) - reader = csv.DictReader(f) - logger.info("Finished reading CSV!") - return list(reader) - - -- cgit v1.2.3 From ff0ece8fb3f0cf247af441fe67f1ba16e9db5071 Mon Sep 17 00:00:00 2001 From: Alex Schofield Date: Wed, 12 Feb 2025 02:00:04 +0000 Subject: add initial cli script for obfuscator --- cli.py | 11 +++++++++++ 1 file changed, 11 insertions(+) create mode 100644 cli.py diff --git a/cli.py b/cli.py new file mode 100644 index 0000000..45fc2f1 --- /dev/null +++ b/cli.py @@ -0,0 +1,11 @@ +import argparse +from obfuscator import csv_reader + +def main(): + parser = argparse.ArgumentParser(description="gdpr-obfuscator") + parser.add_argument("--local", help="Path to local CSV file") + parser.add_argument("--s3", help="Path to S3 object for CSV file") + args = parser.parse_args() + +if __name__ == "__main__": + main() -- cgit v1.2.3 From 6aa8722c071bcdf87ab22bb23d561ff65ca251f6 Mon Sep 17 00:00:00 2001 From: Alex Schofield Date: Fri, 14 Feb 2025 12:12:44 +0000 Subject: rename main csv reader function in cli.py & csv_reader.py --- cli.py | 2 +- obfuscator/csv_reader.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/cli.py b/cli.py index 45fc2f1..1911f5c 100644 --- a/cli.py +++ b/cli.py @@ -1,5 +1,5 @@ import argparse -from obfuscator import csv_reader +from obfuscator.csv_reader import read def main(): parser = argparse.ArgumentParser(description="gdpr-obfuscator") diff --git a/obfuscator/csv_reader.py b/obfuscator/csv_reader.py index 8ee40e9..8a9be9b 100644 --- a/obfuscator/csv_reader.py +++ b/obfuscator/csv_reader.py @@ -11,7 +11,7 @@ if not logger.handlers: handler.setFormatter(formatter) logger.addHandler(handler) -def csv_reader(content: str) -> List[Dict[str, str]]: +def read(content: str) -> List[Dict[str, str]]: f = StringIO(content) reader = csv.DictReader(f) logger.info("Finished reading CSV!") -- cgit v1.2.3 From a16fd48f1e87e9b101250aba532ec81de813e260 Mon Sep 17 00:00:00 2001 From: Alex Schofield Date: Fri, 14 Feb 2025 12:24:40 +0000 Subject: require only --local or --s3 to be chosen by user in cli.py --- cli.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/cli.py b/cli.py index 1911f5c..81114d2 100644 --- a/cli.py +++ b/cli.py @@ -3,9 +3,16 @@ from obfuscator.csv_reader import read def main(): parser = argparse.ArgumentParser(description="gdpr-obfuscator") - parser.add_argument("--local", help="Path to local CSV file") - parser.add_argument("--s3", help="Path to S3 object for CSV file") + # Require user to either choose a local file or an S3 object + loc = parser.add_mutually_exclusive_group(required=True) + loc.add_argument("--local") + loc.add_argument("--s3") args = parser.parse_args() + if args.local and not args.s3: + print(read(args.local)) + else: + pass + if __name__ == "__main__": main() -- cgit v1.2.3 From 6ad17315d1d6a174b9ba7c597947e211ee87f40f Mon Sep 17 00:00:00 2001 From: Alex Schofield Date: Fri, 14 Feb 2025 12:25:41 +0000 Subject: add separate functions for reading locally and from s3 bucket --- cli.py | 2 +- obfuscator/csv_reader.py | 5 ++++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/cli.py b/cli.py index 81114d2..0bbaf89 100644 --- a/cli.py +++ b/cli.py @@ -1,5 +1,5 @@ import argparse -from obfuscator.csv_reader import read +from obfuscator.csv_reader import read_local, read_s3 def main(): parser = argparse.ArgumentParser(description="gdpr-obfuscator") diff --git a/obfuscator/csv_reader.py b/obfuscator/csv_reader.py index 8a9be9b..4ae8a06 100644 --- a/obfuscator/csv_reader.py +++ b/obfuscator/csv_reader.py @@ -11,10 +11,13 @@ if not logger.handlers: handler.setFormatter(formatter) logger.addHandler(handler) -def read(content: str) -> List[Dict[str, str]]: +def read_local(content: str) -> List[Dict[str, str]]: f = StringIO(content) reader = csv.DictReader(f) logger.info("Finished reading CSV!") return list(reader) +def read_s3(): + pass + -- cgit v1.2.3 From 9bc5d21ae7376792c6a4813e1f1ef16bfb42ec37 Mon Sep 17 00:00:00 2001 From: Alex Schofield Date: Fri, 14 Feb 2025 12:32:27 +0000 Subject: encapsulate csv reading logic into class & methods --- obfuscator/csv_reader.py | 32 ++++++++++++++++++++++++-------- 1 file changed, 24 insertions(+), 8 deletions(-) diff --git a/obfuscator/csv_reader.py b/obfuscator/csv_reader.py index 4ae8a06..b365d45 100644 --- a/obfuscator/csv_reader.py +++ b/obfuscator/csv_reader.py @@ -1,5 +1,4 @@ import csv -from io import StringIO from typing import List, Dict import logging @@ -11,13 +10,30 @@ if not logger.handlers: handler.setFormatter(formatter) logger.addHandler(handler) -def read_local(content: str) -> List[Dict[str, str]]: - f = StringIO(content) - reader = csv.DictReader(f) - logger.info("Finished reading CSV!") - return list(reader) -def read_s3(): - pass +class CSVReader: + def __init__(self, path: str): + self.path = path + + def read_local(self) -> List[Dict[str, str]]: + logger.debug(f"Reading local CSV from: {self.path}") + data = [] + + try: + with open(self.path, mode="r", encoding="utf-8") as file: + reader = csv.DictReader(file) + for row in reader: + data.append(dict(row)) + except FileNotFoundError: + logger.error(f"File not found: {self.path}") + except Exception as e: + logger.error(f"Error reading file: {e}") + + logger.debug(f"Total rows read: {len(data)}") + return data + + def read_s3(self) -> List[Dict[str, str]]: + return [] + -- cgit v1.2.3 From 02478f2e48302441f86f3eeaba80119d6bc7ccf1 Mon Sep 17 00:00:00 2001 From: Alex Schofield Date: Fri, 14 Feb 2025 12:59:30 +0000 Subject: update cli.py to properly read from local csv files --- cli.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/cli.py b/cli.py index 0bbaf89..bb12421 100644 --- a/cli.py +++ b/cli.py @@ -1,5 +1,5 @@ import argparse -from obfuscator.csv_reader import read_local, read_s3 +from obfuscator.csv_reader import CSVReader def main(): parser = argparse.ArgumentParser(description="gdpr-obfuscator") @@ -10,7 +10,9 @@ def main(): args = parser.parse_args() if args.local and not args.s3: - print(read(args.local)) + reader = CSVReader(args.local) + data = reader.read_local() + print(data) else: pass -- cgit v1.2.3 From d467ac7f4c08e0cd92a5b31ed21f5ff52309b710 Mon Sep 17 00:00:00 2001 From: Alex Schofield Date: Fri, 14 Feb 2025 13:40:21 +0000 Subject: update project dependencies: add tabulate --- pyproject.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/pyproject.toml b/pyproject.toml index 4aa227a..a9cd511 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -8,6 +8,7 @@ authors = [ readme = "README.md" requires-python = ">=3.13" dependencies = [ + "tabulate (>=0.9.0,<0.10.0)" ] [tool.poetry] -- cgit v1.2.3 From 6182930c3ea53932c6153dd101264cb90c90f979 Mon Sep 17 00:00:00 2001 From: Alex Schofield Date: Fri, 14 Feb 2025 13:44:57 +0000 Subject: add universal logging for debugging --- cli.py | 6 +++++- obfuscator/csv_reader.py | 11 ++--------- obfuscator/logger.py | 21 +++++++++++++++++++++ 3 files changed, 28 insertions(+), 10 deletions(-) create mode 100644 obfuscator/logger.py diff --git a/cli.py b/cli.py index bb12421..23c9057 100644 --- a/cli.py +++ b/cli.py @@ -1,5 +1,8 @@ import argparse from obfuscator.csv_reader import CSVReader +from obfuscator.logger import get_logger + +logger = get_logger("CLI") def main(): parser = argparse.ArgumentParser(description="gdpr-obfuscator") @@ -10,11 +13,12 @@ def main(): args = parser.parse_args() if args.local and not args.s3: + logger.debug("User chose to read CSV from local path") reader = CSVReader(args.local) data = reader.read_local() print(data) else: - pass + logger.debug("User chose to read CSV from S3") if __name__ == "__main__": main() diff --git a/obfuscator/csv_reader.py b/obfuscator/csv_reader.py index b365d45..42e8f8a 100644 --- a/obfuscator/csv_reader.py +++ b/obfuscator/csv_reader.py @@ -1,15 +1,8 @@ import csv from typing import List, Dict -import logging - -logger = logging.getLogger(__name__) -logger.setLevel(logging.INFO) -if not logger.handlers: - handler = logging.StreamHandler() - formatter = logging.Formatter('%(asctime)s %(levelname)s %(message)s') - handler.setFormatter(formatter) - logger.addHandler(handler) +from logger import get_logger +logger = get_logger("CSVReader") class CSVReader: def __init__(self, path: str): diff --git a/obfuscator/logger.py b/obfuscator/logger.py new file mode 100644 index 0000000..52c4f5e --- /dev/null +++ b/obfuscator/logger.py @@ -0,0 +1,21 @@ +import logging +import os + +def get_logger(name: str) -> logging.Logger: + logger = logging.getLogger(name) + + if not logger.hasHandlers(): + if os.getenv("DEBUG", "FALSE").upper() == "TRUE": + log_level = logging.DEBUG + else: + log_level = logging.INFO + + logger.setLevel(log_level) + + handler = logging.StreamHandler() + format = logging.Formatter("%(asctime)s - %(levelname)s - %(name)s - %(message)s") + handler.setFormatter(format) + + logger.addHandler(handler) + + return logger \ No newline at end of file -- cgit v1.2.3 From 0f29d52ba932eeed2ae5826c31cbe9a379fd4579 Mon Sep 17 00:00:00 2001 From: "deepsource-autofix[bot]" <62050782+deepsource-autofix[bot]@users.noreply.github.com> Date: Fri, 14 Feb 2025 13:47:05 +0000 Subject: style: format code with Autopep8, Black and Ruff Formatter This commit fixes the style issues introduced in 6182930 according to the output from Autopep8, Black and Ruff Formatter. Details: https://github.com/ajschofield/gdpr-obfuscator/pull/1 --- cli.py | 2 ++ obfuscator/csv_reader.py | 4 +--- obfuscator/logger.py | 7 +++++-- test/test_csv_reader.py | 15 ++++++++++----- 4 files changed, 18 insertions(+), 10 deletions(-) diff --git a/cli.py b/cli.py index 23c9057..652830e 100644 --- a/cli.py +++ b/cli.py @@ -4,6 +4,7 @@ from obfuscator.logger import get_logger logger = get_logger("CLI") + def main(): parser = argparse.ArgumentParser(description="gdpr-obfuscator") # Require user to either choose a local file or an S3 object @@ -20,5 +21,6 @@ def main(): else: logger.debug("User chose to read CSV from S3") + if __name__ == "__main__": main() diff --git a/obfuscator/csv_reader.py b/obfuscator/csv_reader.py index 42e8f8a..5bc91dc 100644 --- a/obfuscator/csv_reader.py +++ b/obfuscator/csv_reader.py @@ -4,6 +4,7 @@ from logger import get_logger logger = get_logger("CSVReader") + class CSVReader: def __init__(self, path: str): self.path = path @@ -27,6 +28,3 @@ class CSVReader: def read_s3(self) -> List[Dict[str, str]]: return [] - - - diff --git a/obfuscator/logger.py b/obfuscator/logger.py index 52c4f5e..61a75ec 100644 --- a/obfuscator/logger.py +++ b/obfuscator/logger.py @@ -1,6 +1,7 @@ import logging import os + def get_logger(name: str) -> logging.Logger: logger = logging.getLogger(name) @@ -13,9 +14,11 @@ def get_logger(name: str) -> logging.Logger: logger.setLevel(log_level) handler = logging.StreamHandler() - format = logging.Formatter("%(asctime)s - %(levelname)s - %(name)s - %(message)s") + format = logging.Formatter( + "%(asctime)s - %(levelname)s - %(name)s - %(message)s" + ) handler.setFormatter(format) logger.addHandler(handler) - return logger \ No newline at end of file + return logger diff --git a/test/test_csv_reader.py b/test/test_csv_reader.py index d245d7c..ac43b04 100644 --- a/test/test_csv_reader.py +++ b/test/test_csv_reader.py @@ -4,34 +4,38 @@ from main import csv_reader import pytest + def test_empty_csv_should_return_no_content(): content = "" result = csv_reader(content) expected = [] assert result == expected + def test_csv_with_header_only_should_return_no_content(): content = "student_id,name,course\n" result = csv_reader(content) expected = [] assert result == expected + def test_csv_with_valid_data(): content = ( "student_id,name,course\n" "1234,Student 1,Course 1\n" "5678,Student 2,Course 2\n" - ) + ) result = csv_reader(content) expected = [ {"student_id": "1234", "name": "Student 1", "course": "Course 1"}, {"student_id": "5678", "name": "Student 2", "course": "Course 2"}, - ] - assert result == expected + ] + assert result == expected + def test_csv_with_quoted_fields_should_run_as_expected(): content = ( - 'student_id,name,course\n' + "student_id,name,course\n" '1234,"Student 1","Course 1"\n' '5678,"Student 2","Course 2"\n' ) @@ -42,8 +46,9 @@ def test_csv_with_quoted_fields_should_run_as_expected(): ] assert result == expected + def test_non_csv_file_should_return_no_content(): content = "" result = csv_reader(content) expected = [] - assert result == expected \ No newline at end of file + assert result == expected -- cgit v1.2.3 From afcf7339eec26144e4ab99a3e67e60fb38932960 Mon Sep 17 00:00:00 2001 From: Alex Schofield Date: Fri, 14 Feb 2025 13:57:38 +0000 Subject: add pytest-cov as dev dependency --- pyproject.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/pyproject.toml b/pyproject.toml index a9cd511..d5db843 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -18,6 +18,7 @@ packages = [ [tool.poetry.group.dev.dependencies] pytest = "8.3.4" +pytest-cov = "^6.0.0" [build-system] requires = ["poetry-core>=2.0.0,<3.0.0"] -- cgit v1.2.3 From bebcd423e7ca5aa620b8ceeef67e77daa3f14f0f Mon Sep 17 00:00:00 2001 From: Alex Schofield Date: Fri, 14 Feb 2025 13:59:03 +0000 Subject: process path in CSVReader methods & fix import paths --- cli.py | 4 ++-- obfuscator/csv_reader.py | 14 +++++++------- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/cli.py b/cli.py index 23c9057..885573d 100644 --- a/cli.py +++ b/cli.py @@ -14,8 +14,8 @@ def main(): if args.local and not args.s3: logger.debug("User chose to read CSV from local path") - reader = CSVReader(args.local) - data = reader.read_local() + reader = CSVReader() + data = reader.read_local(args.local) print(data) else: logger.debug("User chose to read CSV from S3") diff --git a/obfuscator/csv_reader.py b/obfuscator/csv_reader.py index 42e8f8a..901d396 100644 --- a/obfuscator/csv_reader.py +++ b/obfuscator/csv_reader.py @@ -1,24 +1,24 @@ import csv from typing import List, Dict -from logger import get_logger +from obfuscator.logger import get_logger logger = get_logger("CSVReader") class CSVReader: - def __init__(self, path: str): - self.path = path + def __init__(self): + pass - def read_local(self) -> List[Dict[str, str]]: - logger.debug(f"Reading local CSV from: {self.path}") + def read_local(self, path) -> List[Dict[str, str]]: + logger.debug(f"Reading local CSV from: {path}") data = [] try: - with open(self.path, mode="r", encoding="utf-8") as file: + with open(path, mode="r", encoding="utf-8") as file: reader = csv.DictReader(file) for row in reader: data.append(dict(row)) except FileNotFoundError: - logger.error(f"File not found: {self.path}") + logger.error(f"File not found: {path}") except Exception as e: logger.error(f"Error reading file: {e}") -- cgit v1.2.3 From ad1d342894d31838c3b1b98eb66c5498c101e8ac Mon Sep 17 00:00:00 2001 From: Alex Schofield Date: Fri, 14 Feb 2025 14:01:05 +0000 Subject: create CSVReader instance for tests - broken --- test/test_csv_reader.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/test/test_csv_reader.py b/test/test_csv_reader.py index d245d7c..0098e0f 100644 --- a/test/test_csv_reader.py +++ b/test/test_csv_reader.py @@ -1,9 +1,13 @@ # csv_reader.py - tests # Author: Alex Schofield -from main import csv_reader +from obfuscator.csv_reader import CSVReader import pytest +reader = CSVReader() + +### TODO : TESTS ARE BROKEN, FIX THEM IN NEXT BRANCH ### + def test_empty_csv_should_return_no_content(): content = "" result = csv_reader(content) -- cgit v1.2.3 From c8df3c6d06dcb2ba6720a17bf3b6db67693c41dd Mon Sep 17 00:00:00 2001 From: "deepsource-autofix[bot]" <62050782+deepsource-autofix[bot]@users.noreply.github.com> Date: Fri, 14 Feb 2025 14:04:18 +0000 Subject: style: format code with Autopep8, Black and Ruff Formatter This commit fixes the style issues introduced in 5082d02 according to the output from Autopep8, Black and Ruff Formatter. Details: https://github.com/ajschofield/gdpr-obfuscator/pull/1 --- test/test_csv_reader.py | 1 + 1 file changed, 1 insertion(+) diff --git a/test/test_csv_reader.py b/test/test_csv_reader.py index fb5996d..03c6e4b 100644 --- a/test/test_csv_reader.py +++ b/test/test_csv_reader.py @@ -8,6 +8,7 @@ reader = CSVReader() ### TODO : TESTS ARE BROKEN, FIX THEM IN NEXT BRANCH ### + def test_empty_csv_should_return_no_content(): content = "" result = csv_reader(content) -- cgit v1.2.3 From c03897d446352c461790ab82c7a2bae85db17e86 Mon Sep 17 00:00:00 2001 From: Alex Schofield Date: Mon, 17 Feb 2025 01:06:08 +0000 Subject: add method for reading csv content directly from string --- obfuscator/csv_reader.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/obfuscator/csv_reader.py b/obfuscator/csv_reader.py index cbd18c1..cf81a30 100644 --- a/obfuscator/csv_reader.py +++ b/obfuscator/csv_reader.py @@ -1,4 +1,5 @@ import csv +import io from typing import List, Dict from obfuscator.logger import get_logger @@ -28,3 +29,11 @@ class CSVReader: def read_s3(self) -> List[Dict[str, str]]: return [] + + def read_string(self, content: str) -> List[Dict[str, str]]: + if not content.strip(): + return [] + + f = io.StringIO(content) + reader = csv.DictReader(f) + return [dict(row) for row in reader] -- cgit v1.2.3 From 92a7f1ae67e825eefb9488637added14cd5f7d53 Mon Sep 17 00:00:00 2001 From: Alex Schofield Date: Mon, 17 Feb 2025 01:07:40 +0000 Subject: fix tests for csv_reader by using new read_string method --- test/test_csv_reader.py | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/test/test_csv_reader.py b/test/test_csv_reader.py index 03c6e4b..48a808c 100644 --- a/test/test_csv_reader.py +++ b/test/test_csv_reader.py @@ -6,19 +6,16 @@ import pytest reader = CSVReader() -### TODO : TESTS ARE BROKEN, FIX THEM IN NEXT BRANCH ### - - def test_empty_csv_should_return_no_content(): content = "" - result = csv_reader(content) + result = reader.read_string(content) expected = [] assert result == expected def test_csv_with_header_only_should_return_no_content(): content = "student_id,name,course\n" - result = csv_reader(content) + result = reader.read_string(content) expected = [] assert result == expected @@ -29,7 +26,7 @@ def test_csv_with_valid_data(): "1234,Student 1,Course 1\n" "5678,Student 2,Course 2\n" ) - result = csv_reader(content) + result = reader.read_string(content) expected = [ {"student_id": "1234", "name": "Student 1", "course": "Course 1"}, {"student_id": "5678", "name": "Student 2", "course": "Course 2"}, @@ -43,7 +40,7 @@ def test_csv_with_quoted_fields_should_run_as_expected(): '1234,"Student 1","Course 1"\n' '5678,"Student 2","Course 2"\n' ) - result = csv_reader(content) + result = reader.read_string(content) expected = [ {"student_id": "1234", "name": "Student 1", "course": "Course 1"}, {"student_id": "5678", "name": "Student 2", "course": "Course 2"}, @@ -53,6 +50,6 @@ def test_csv_with_quoted_fields_should_run_as_expected(): def test_non_csv_file_should_return_no_content(): content = "" - result = csv_reader(content) + result = reader.read_string(content) expected = [] assert result == expected -- cgit v1.2.3 From 3c0d24558db94e359fbcde89a48526b8b36218e4 Mon Sep 17 00:00:00 2001 From: Alex Schofield Date: Mon, 17 Feb 2025 01:10:17 +0000 Subject: anti-pattern: fix re-definition of format variable in logger.py --- obfuscator/logger.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/obfuscator/logger.py b/obfuscator/logger.py index 61a75ec..ca41e95 100644 --- a/obfuscator/logger.py +++ b/obfuscator/logger.py @@ -14,10 +14,10 @@ def get_logger(name: str) -> logging.Logger: logger.setLevel(log_level) handler = logging.StreamHandler() - format = logging.Formatter( + formatting = logging.Formatter( "%(asctime)s - %(levelname)s - %(name)s - %(message)s" ) - handler.setFormatter(format) + handler.setFormatter(formatting) logger.addHandler(handler) -- cgit v1.2.3 From fd7598acd7e33782090d7f866fa51c167e2190c8 Mon Sep 17 00:00:00 2001 From: Alex Schofield Date: Mon, 17 Feb 2025 01:12:00 +0000 Subject: performance: add @staticmethod to CSVReader methods to save memory --- obfuscator/csv_reader.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/obfuscator/csv_reader.py b/obfuscator/csv_reader.py index cf81a30..1fb1e30 100644 --- a/obfuscator/csv_reader.py +++ b/obfuscator/csv_reader.py @@ -9,8 +9,9 @@ logger = get_logger("CSVReader") class CSVReader: def __init__(self): pass - - def read_local(self, path) -> List[Dict[str, str]]: + + @staticmethod + def read_local(path) -> List[Dict[str, str]]: logger.debug(f"Reading local CSV from: {path}") data = [] @@ -26,10 +27,12 @@ class CSVReader: logger.debug(f"Total rows read: {len(data)}") return data - - def read_s3(self) -> List[Dict[str, str]]: - return [] + @staticmethod + def read_s3(path) -> List[Dict[str, str]]: + return [] + + @staticmethod def read_string(self, content: str) -> List[Dict[str, str]]: if not content.strip(): return [] -- cgit v1.2.3 From d1c0d349ed666ef3959c00cfb30dc5ad5e786e2c Mon Sep 17 00:00:00 2001 From: Alex Schofield Date: Mon, 17 Feb 2025 01:14:25 +0000 Subject: anti-pattern: remove unused __init__ for CSVReader --- obfuscator/csv_reader.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/obfuscator/csv_reader.py b/obfuscator/csv_reader.py index 1fb1e30..55fb892 100644 --- a/obfuscator/csv_reader.py +++ b/obfuscator/csv_reader.py @@ -7,9 +7,6 @@ logger = get_logger("CSVReader") class CSVReader: - def __init__(self): - pass - @staticmethod def read_local(path) -> List[Dict[str, str]]: logger.debug(f"Reading local CSV from: {path}") @@ -33,7 +30,7 @@ class CSVReader: return [] @staticmethod - def read_string(self, content: str) -> List[Dict[str, str]]: + def read_string(content: str) -> List[Dict[str, str]]: if not content.strip(): return [] -- cgit v1.2.3 From f612f71ef5d09ce93526b4268173c612f06ae701 Mon Sep 17 00:00:00 2001 From: Alex Schofield Date: Mon, 17 Feb 2025 01:17:06 +0000 Subject: use list comprehension when returning output from read_local --- obfuscator/csv_reader.py | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/obfuscator/csv_reader.py b/obfuscator/csv_reader.py index 55fb892..3b4496b 100644 --- a/obfuscator/csv_reader.py +++ b/obfuscator/csv_reader.py @@ -5,25 +5,19 @@ from obfuscator.logger import get_logger logger = get_logger("CSVReader") - class CSVReader: @staticmethod def read_local(path) -> List[Dict[str, str]]: logger.debug(f"Reading local CSV from: {path}") - data = [] try: - with open(path, mode="r", encoding="utf-8") as file: - reader = csv.DictReader(file) - for row in reader: - data.append(dict(row)) + with open(path, mode="r", encoding="utf-8") as f: + reader = csv.DictReader(f) + return [dict(row) for row in reader] except FileNotFoundError: logger.error(f"File not found: {path}") except Exception as e: logger.error(f"Error reading file: {e}") - - logger.debug(f"Total rows read: {len(data)}") - return data @staticmethod def read_s3(path) -> List[Dict[str, str]]: -- cgit v1.2.3 From cf1376862fb2f58c2e837338ed9c765439ffa1b9 Mon Sep 17 00:00:00 2001 From: "deepsource-autofix[bot]" <62050782+deepsource-autofix[bot]@users.noreply.github.com> Date: Mon, 17 Feb 2025 01:17:37 +0000 Subject: style: format code with Autopep8, Black and Ruff Formatter This commit fixes the style issues introduced in f612f71 according to the output from Autopep8, Black and Ruff Formatter. Details: https://github.com/ajschofield/gdpr-obfuscator/pull/1 --- obfuscator/csv_reader.py | 5 +++-- test/test_csv_reader.py | 1 + 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/obfuscator/csv_reader.py b/obfuscator/csv_reader.py index 3b4496b..b9dccdb 100644 --- a/obfuscator/csv_reader.py +++ b/obfuscator/csv_reader.py @@ -5,6 +5,7 @@ from obfuscator.logger import get_logger logger = get_logger("CSVReader") + class CSVReader: @staticmethod def read_local(path) -> List[Dict[str, str]]: @@ -18,7 +19,7 @@ class CSVReader: logger.error(f"File not found: {path}") except Exception as e: logger.error(f"Error reading file: {e}") - + @staticmethod def read_s3(path) -> List[Dict[str, str]]: return [] @@ -27,7 +28,7 @@ class CSVReader: def read_string(content: str) -> List[Dict[str, str]]: if not content.strip(): return [] - + f = io.StringIO(content) reader = csv.DictReader(f) return [dict(row) for row in reader] diff --git a/test/test_csv_reader.py b/test/test_csv_reader.py index 48a808c..e62c093 100644 --- a/test/test_csv_reader.py +++ b/test/test_csv_reader.py @@ -6,6 +6,7 @@ import pytest reader = CSVReader() + def test_empty_csv_should_return_no_content(): content = "" result = reader.read_string(content) -- cgit v1.2.3