From 5e125e6c561c81526c2f4c4c7895fe720ba64d90 Mon Sep 17 00:00:00 2001 From: Alex Schofield Date: Wed, 12 Feb 2025 01:56:14 +0000 Subject: re-organise project structure & update pyproject.toml --- obfuscator/__init__.py | 0 obfuscator/csv_reader.py | 20 ++++++++++++++++++++ 2 files changed, 20 insertions(+) create mode 100644 obfuscator/__init__.py create mode 100644 obfuscator/csv_reader.py (limited to 'obfuscator') diff --git a/obfuscator/__init__.py b/obfuscator/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/obfuscator/csv_reader.py b/obfuscator/csv_reader.py new file mode 100644 index 0000000..8ee40e9 --- /dev/null +++ b/obfuscator/csv_reader.py @@ -0,0 +1,20 @@ +import csv +from io import StringIO +from typing import List, Dict +import logging + +logger = logging.getLogger(__name__) +logger.setLevel(logging.INFO) +if not logger.handlers: + handler = logging.StreamHandler() + formatter = logging.Formatter('%(asctime)s %(levelname)s %(message)s') + handler.setFormatter(formatter) + logger.addHandler(handler) + +def csv_reader(content: str) -> List[Dict[str, str]]: + f = StringIO(content) + reader = csv.DictReader(f) + logger.info("Finished reading CSV!") + return list(reader) + + -- cgit v1.2.3 From 6aa8722c071bcdf87ab22bb23d561ff65ca251f6 Mon Sep 17 00:00:00 2001 From: Alex Schofield Date: Fri, 14 Feb 2025 12:12:44 +0000 Subject: rename main csv reader function in cli.py & csv_reader.py --- cli.py | 2 +- obfuscator/csv_reader.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'obfuscator') diff --git a/cli.py b/cli.py index 45fc2f1..1911f5c 100644 --- a/cli.py +++ b/cli.py @@ -1,5 +1,5 @@ import argparse -from obfuscator import csv_reader +from obfuscator.csv_reader import read def main(): parser = argparse.ArgumentParser(description="gdpr-obfuscator") diff --git a/obfuscator/csv_reader.py b/obfuscator/csv_reader.py index 8ee40e9..8a9be9b 100644 --- a/obfuscator/csv_reader.py +++ b/obfuscator/csv_reader.py @@ -11,7 +11,7 @@ if not logger.handlers: handler.setFormatter(formatter) logger.addHandler(handler) -def csv_reader(content: str) -> List[Dict[str, str]]: +def read(content: str) -> List[Dict[str, str]]: f = StringIO(content) reader = csv.DictReader(f) logger.info("Finished reading CSV!") -- cgit v1.2.3 From 6ad17315d1d6a174b9ba7c597947e211ee87f40f Mon Sep 17 00:00:00 2001 From: Alex Schofield Date: Fri, 14 Feb 2025 12:25:41 +0000 Subject: add separate functions for reading locally and from s3 bucket --- cli.py | 2 +- obfuscator/csv_reader.py | 5 ++++- 2 files changed, 5 insertions(+), 2 deletions(-) (limited to 'obfuscator') diff --git a/cli.py b/cli.py index 81114d2..0bbaf89 100644 --- a/cli.py +++ b/cli.py @@ -1,5 +1,5 @@ import argparse -from obfuscator.csv_reader import read +from obfuscator.csv_reader import read_local, read_s3 def main(): parser = argparse.ArgumentParser(description="gdpr-obfuscator") diff --git a/obfuscator/csv_reader.py b/obfuscator/csv_reader.py index 8a9be9b..4ae8a06 100644 --- a/obfuscator/csv_reader.py +++ b/obfuscator/csv_reader.py @@ -11,10 +11,13 @@ if not logger.handlers: handler.setFormatter(formatter) logger.addHandler(handler) -def read(content: str) -> List[Dict[str, str]]: +def read_local(content: str) -> List[Dict[str, str]]: f = StringIO(content) reader = csv.DictReader(f) logger.info("Finished reading CSV!") return list(reader) +def read_s3(): + pass + -- cgit v1.2.3 From 9bc5d21ae7376792c6a4813e1f1ef16bfb42ec37 Mon Sep 17 00:00:00 2001 From: Alex Schofield Date: Fri, 14 Feb 2025 12:32:27 +0000 Subject: encapsulate csv reading logic into class & methods --- obfuscator/csv_reader.py | 32 ++++++++++++++++++++++++-------- 1 file changed, 24 insertions(+), 8 deletions(-) (limited to 'obfuscator') diff --git a/obfuscator/csv_reader.py b/obfuscator/csv_reader.py index 4ae8a06..b365d45 100644 --- a/obfuscator/csv_reader.py +++ b/obfuscator/csv_reader.py @@ -1,5 +1,4 @@ import csv -from io import StringIO from typing import List, Dict import logging @@ -11,13 +10,30 @@ if not logger.handlers: handler.setFormatter(formatter) logger.addHandler(handler) -def read_local(content: str) -> List[Dict[str, str]]: - f = StringIO(content) - reader = csv.DictReader(f) - logger.info("Finished reading CSV!") - return list(reader) -def read_s3(): - pass +class CSVReader: + def __init__(self, path: str): + self.path = path + + def read_local(self) -> List[Dict[str, str]]: + logger.debug(f"Reading local CSV from: {self.path}") + data = [] + + try: + with open(self.path, mode="r", encoding="utf-8") as file: + reader = csv.DictReader(file) + for row in reader: + data.append(dict(row)) + except FileNotFoundError: + logger.error(f"File not found: {self.path}") + except Exception as e: + logger.error(f"Error reading file: {e}") + + logger.debug(f"Total rows read: {len(data)}") + return data + + def read_s3(self) -> List[Dict[str, str]]: + return [] + -- cgit v1.2.3 From 6182930c3ea53932c6153dd101264cb90c90f979 Mon Sep 17 00:00:00 2001 From: Alex Schofield Date: Fri, 14 Feb 2025 13:44:57 +0000 Subject: add universal logging for debugging --- cli.py | 6 +++++- obfuscator/csv_reader.py | 11 ++--------- obfuscator/logger.py | 21 +++++++++++++++++++++ 3 files changed, 28 insertions(+), 10 deletions(-) create mode 100644 obfuscator/logger.py (limited to 'obfuscator') diff --git a/cli.py b/cli.py index bb12421..23c9057 100644 --- a/cli.py +++ b/cli.py @@ -1,5 +1,8 @@ import argparse from obfuscator.csv_reader import CSVReader +from obfuscator.logger import get_logger + +logger = get_logger("CLI") def main(): parser = argparse.ArgumentParser(description="gdpr-obfuscator") @@ -10,11 +13,12 @@ def main(): args = parser.parse_args() if args.local and not args.s3: + logger.debug("User chose to read CSV from local path") reader = CSVReader(args.local) data = reader.read_local() print(data) else: - pass + logger.debug("User chose to read CSV from S3") if __name__ == "__main__": main() diff --git a/obfuscator/csv_reader.py b/obfuscator/csv_reader.py index b365d45..42e8f8a 100644 --- a/obfuscator/csv_reader.py +++ b/obfuscator/csv_reader.py @@ -1,15 +1,8 @@ import csv from typing import List, Dict -import logging - -logger = logging.getLogger(__name__) -logger.setLevel(logging.INFO) -if not logger.handlers: - handler = logging.StreamHandler() - formatter = logging.Formatter('%(asctime)s %(levelname)s %(message)s') - handler.setFormatter(formatter) - logger.addHandler(handler) +from logger import get_logger +logger = get_logger("CSVReader") class CSVReader: def __init__(self, path: str): diff --git a/obfuscator/logger.py b/obfuscator/logger.py new file mode 100644 index 0000000..52c4f5e --- /dev/null +++ b/obfuscator/logger.py @@ -0,0 +1,21 @@ +import logging +import os + +def get_logger(name: str) -> logging.Logger: + logger = logging.getLogger(name) + + if not logger.hasHandlers(): + if os.getenv("DEBUG", "FALSE").upper() == "TRUE": + log_level = logging.DEBUG + else: + log_level = logging.INFO + + logger.setLevel(log_level) + + handler = logging.StreamHandler() + format = logging.Formatter("%(asctime)s - %(levelname)s - %(name)s - %(message)s") + handler.setFormatter(format) + + logger.addHandler(handler) + + return logger \ No newline at end of file -- cgit v1.2.3 From 0f29d52ba932eeed2ae5826c31cbe9a379fd4579 Mon Sep 17 00:00:00 2001 From: "deepsource-autofix[bot]" <62050782+deepsource-autofix[bot]@users.noreply.github.com> Date: Fri, 14 Feb 2025 13:47:05 +0000 Subject: style: format code with Autopep8, Black and Ruff Formatter This commit fixes the style issues introduced in 6182930 according to the output from Autopep8, Black and Ruff Formatter. Details: https://github.com/ajschofield/gdpr-obfuscator/pull/1 --- cli.py | 2 ++ obfuscator/csv_reader.py | 4 +--- obfuscator/logger.py | 7 +++++-- test/test_csv_reader.py | 15 ++++++++++----- 4 files changed, 18 insertions(+), 10 deletions(-) (limited to 'obfuscator') diff --git a/cli.py b/cli.py index 23c9057..652830e 100644 --- a/cli.py +++ b/cli.py @@ -4,6 +4,7 @@ from obfuscator.logger import get_logger logger = get_logger("CLI") + def main(): parser = argparse.ArgumentParser(description="gdpr-obfuscator") # Require user to either choose a local file or an S3 object @@ -20,5 +21,6 @@ def main(): else: logger.debug("User chose to read CSV from S3") + if __name__ == "__main__": main() diff --git a/obfuscator/csv_reader.py b/obfuscator/csv_reader.py index 42e8f8a..5bc91dc 100644 --- a/obfuscator/csv_reader.py +++ b/obfuscator/csv_reader.py @@ -4,6 +4,7 @@ from logger import get_logger logger = get_logger("CSVReader") + class CSVReader: def __init__(self, path: str): self.path = path @@ -27,6 +28,3 @@ class CSVReader: def read_s3(self) -> List[Dict[str, str]]: return [] - - - diff --git a/obfuscator/logger.py b/obfuscator/logger.py index 52c4f5e..61a75ec 100644 --- a/obfuscator/logger.py +++ b/obfuscator/logger.py @@ -1,6 +1,7 @@ import logging import os + def get_logger(name: str) -> logging.Logger: logger = logging.getLogger(name) @@ -13,9 +14,11 @@ def get_logger(name: str) -> logging.Logger: logger.setLevel(log_level) handler = logging.StreamHandler() - format = logging.Formatter("%(asctime)s - %(levelname)s - %(name)s - %(message)s") + format = logging.Formatter( + "%(asctime)s - %(levelname)s - %(name)s - %(message)s" + ) handler.setFormatter(format) logger.addHandler(handler) - return logger \ No newline at end of file + return logger diff --git a/test/test_csv_reader.py b/test/test_csv_reader.py index d245d7c..ac43b04 100644 --- a/test/test_csv_reader.py +++ b/test/test_csv_reader.py @@ -4,34 +4,38 @@ from main import csv_reader import pytest + def test_empty_csv_should_return_no_content(): content = "" result = csv_reader(content) expected = [] assert result == expected + def test_csv_with_header_only_should_return_no_content(): content = "student_id,name,course\n" result = csv_reader(content) expected = [] assert result == expected + def test_csv_with_valid_data(): content = ( "student_id,name,course\n" "1234,Student 1,Course 1\n" "5678,Student 2,Course 2\n" - ) + ) result = csv_reader(content) expected = [ {"student_id": "1234", "name": "Student 1", "course": "Course 1"}, {"student_id": "5678", "name": "Student 2", "course": "Course 2"}, - ] - assert result == expected + ] + assert result == expected + def test_csv_with_quoted_fields_should_run_as_expected(): content = ( - 'student_id,name,course\n' + "student_id,name,course\n" '1234,"Student 1","Course 1"\n' '5678,"Student 2","Course 2"\n' ) @@ -42,8 +46,9 @@ def test_csv_with_quoted_fields_should_run_as_expected(): ] assert result == expected + def test_non_csv_file_should_return_no_content(): content = "" result = csv_reader(content) expected = [] - assert result == expected \ No newline at end of file + assert result == expected -- cgit v1.2.3 From bebcd423e7ca5aa620b8ceeef67e77daa3f14f0f Mon Sep 17 00:00:00 2001 From: Alex Schofield Date: Fri, 14 Feb 2025 13:59:03 +0000 Subject: process path in CSVReader methods & fix import paths --- cli.py | 4 ++-- obfuscator/csv_reader.py | 14 +++++++------- 2 files changed, 9 insertions(+), 9 deletions(-) (limited to 'obfuscator') diff --git a/cli.py b/cli.py index 23c9057..885573d 100644 --- a/cli.py +++ b/cli.py @@ -14,8 +14,8 @@ def main(): if args.local and not args.s3: logger.debug("User chose to read CSV from local path") - reader = CSVReader(args.local) - data = reader.read_local() + reader = CSVReader() + data = reader.read_local(args.local) print(data) else: logger.debug("User chose to read CSV from S3") diff --git a/obfuscator/csv_reader.py b/obfuscator/csv_reader.py index 42e8f8a..901d396 100644 --- a/obfuscator/csv_reader.py +++ b/obfuscator/csv_reader.py @@ -1,24 +1,24 @@ import csv from typing import List, Dict -from logger import get_logger +from obfuscator.logger import get_logger logger = get_logger("CSVReader") class CSVReader: - def __init__(self, path: str): - self.path = path + def __init__(self): + pass - def read_local(self) -> List[Dict[str, str]]: - logger.debug(f"Reading local CSV from: {self.path}") + def read_local(self, path) -> List[Dict[str, str]]: + logger.debug(f"Reading local CSV from: {path}") data = [] try: - with open(self.path, mode="r", encoding="utf-8") as file: + with open(path, mode="r", encoding="utf-8") as file: reader = csv.DictReader(file) for row in reader: data.append(dict(row)) except FileNotFoundError: - logger.error(f"File not found: {self.path}") + logger.error(f"File not found: {path}") except Exception as e: logger.error(f"Error reading file: {e}") -- cgit v1.2.3 From c03897d446352c461790ab82c7a2bae85db17e86 Mon Sep 17 00:00:00 2001 From: Alex Schofield Date: Mon, 17 Feb 2025 01:06:08 +0000 Subject: add method for reading csv content directly from string --- obfuscator/csv_reader.py | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'obfuscator') diff --git a/obfuscator/csv_reader.py b/obfuscator/csv_reader.py index cbd18c1..cf81a30 100644 --- a/obfuscator/csv_reader.py +++ b/obfuscator/csv_reader.py @@ -1,4 +1,5 @@ import csv +import io from typing import List, Dict from obfuscator.logger import get_logger @@ -28,3 +29,11 @@ class CSVReader: def read_s3(self) -> List[Dict[str, str]]: return [] + + def read_string(self, content: str) -> List[Dict[str, str]]: + if not content.strip(): + return [] + + f = io.StringIO(content) + reader = csv.DictReader(f) + return [dict(row) for row in reader] -- cgit v1.2.3 From 3c0d24558db94e359fbcde89a48526b8b36218e4 Mon Sep 17 00:00:00 2001 From: Alex Schofield Date: Mon, 17 Feb 2025 01:10:17 +0000 Subject: anti-pattern: fix re-definition of format variable in logger.py --- obfuscator/logger.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'obfuscator') diff --git a/obfuscator/logger.py b/obfuscator/logger.py index 61a75ec..ca41e95 100644 --- a/obfuscator/logger.py +++ b/obfuscator/logger.py @@ -14,10 +14,10 @@ def get_logger(name: str) -> logging.Logger: logger.setLevel(log_level) handler = logging.StreamHandler() - format = logging.Formatter( + formatting = logging.Formatter( "%(asctime)s - %(levelname)s - %(name)s - %(message)s" ) - handler.setFormatter(format) + handler.setFormatter(formatting) logger.addHandler(handler) -- cgit v1.2.3 From fd7598acd7e33782090d7f866fa51c167e2190c8 Mon Sep 17 00:00:00 2001 From: Alex Schofield Date: Mon, 17 Feb 2025 01:12:00 +0000 Subject: performance: add @staticmethod to CSVReader methods to save memory --- obfuscator/csv_reader.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) (limited to 'obfuscator') diff --git a/obfuscator/csv_reader.py b/obfuscator/csv_reader.py index cf81a30..1fb1e30 100644 --- a/obfuscator/csv_reader.py +++ b/obfuscator/csv_reader.py @@ -9,8 +9,9 @@ logger = get_logger("CSVReader") class CSVReader: def __init__(self): pass - - def read_local(self, path) -> List[Dict[str, str]]: + + @staticmethod + def read_local(path) -> List[Dict[str, str]]: logger.debug(f"Reading local CSV from: {path}") data = [] @@ -26,10 +27,12 @@ class CSVReader: logger.debug(f"Total rows read: {len(data)}") return data - - def read_s3(self) -> List[Dict[str, str]]: - return [] + @staticmethod + def read_s3(path) -> List[Dict[str, str]]: + return [] + + @staticmethod def read_string(self, content: str) -> List[Dict[str, str]]: if not content.strip(): return [] -- cgit v1.2.3 From d1c0d349ed666ef3959c00cfb30dc5ad5e786e2c Mon Sep 17 00:00:00 2001 From: Alex Schofield Date: Mon, 17 Feb 2025 01:14:25 +0000 Subject: anti-pattern: remove unused __init__ for CSVReader --- obfuscator/csv_reader.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'obfuscator') diff --git a/obfuscator/csv_reader.py b/obfuscator/csv_reader.py index 1fb1e30..55fb892 100644 --- a/obfuscator/csv_reader.py +++ b/obfuscator/csv_reader.py @@ -7,9 +7,6 @@ logger = get_logger("CSVReader") class CSVReader: - def __init__(self): - pass - @staticmethod def read_local(path) -> List[Dict[str, str]]: logger.debug(f"Reading local CSV from: {path}") @@ -33,7 +30,7 @@ class CSVReader: return [] @staticmethod - def read_string(self, content: str) -> List[Dict[str, str]]: + def read_string(content: str) -> List[Dict[str, str]]: if not content.strip(): return [] -- cgit v1.2.3 From f612f71ef5d09ce93526b4268173c612f06ae701 Mon Sep 17 00:00:00 2001 From: Alex Schofield Date: Mon, 17 Feb 2025 01:17:06 +0000 Subject: use list comprehension when returning output from read_local --- obfuscator/csv_reader.py | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) (limited to 'obfuscator') diff --git a/obfuscator/csv_reader.py b/obfuscator/csv_reader.py index 55fb892..3b4496b 100644 --- a/obfuscator/csv_reader.py +++ b/obfuscator/csv_reader.py @@ -5,25 +5,19 @@ from obfuscator.logger import get_logger logger = get_logger("CSVReader") - class CSVReader: @staticmethod def read_local(path) -> List[Dict[str, str]]: logger.debug(f"Reading local CSV from: {path}") - data = [] try: - with open(path, mode="r", encoding="utf-8") as file: - reader = csv.DictReader(file) - for row in reader: - data.append(dict(row)) + with open(path, mode="r", encoding="utf-8") as f: + reader = csv.DictReader(f) + return [dict(row) for row in reader] except FileNotFoundError: logger.error(f"File not found: {path}") except Exception as e: logger.error(f"Error reading file: {e}") - - logger.debug(f"Total rows read: {len(data)}") - return data @staticmethod def read_s3(path) -> List[Dict[str, str]]: -- cgit v1.2.3 From cf1376862fb2f58c2e837338ed9c765439ffa1b9 Mon Sep 17 00:00:00 2001 From: "deepsource-autofix[bot]" <62050782+deepsource-autofix[bot]@users.noreply.github.com> Date: Mon, 17 Feb 2025 01:17:37 +0000 Subject: style: format code with Autopep8, Black and Ruff Formatter This commit fixes the style issues introduced in f612f71 according to the output from Autopep8, Black and Ruff Formatter. Details: https://github.com/ajschofield/gdpr-obfuscator/pull/1 --- obfuscator/csv_reader.py | 5 +++-- test/test_csv_reader.py | 1 + 2 files changed, 4 insertions(+), 2 deletions(-) (limited to 'obfuscator') diff --git a/obfuscator/csv_reader.py b/obfuscator/csv_reader.py index 3b4496b..b9dccdb 100644 --- a/obfuscator/csv_reader.py +++ b/obfuscator/csv_reader.py @@ -5,6 +5,7 @@ from obfuscator.logger import get_logger logger = get_logger("CSVReader") + class CSVReader: @staticmethod def read_local(path) -> List[Dict[str, str]]: @@ -18,7 +19,7 @@ class CSVReader: logger.error(f"File not found: {path}") except Exception as e: logger.error(f"Error reading file: {e}") - + @staticmethod def read_s3(path) -> List[Dict[str, str]]: return [] @@ -27,7 +28,7 @@ class CSVReader: def read_string(content: str) -> List[Dict[str, str]]: if not content.strip(): return [] - + f = io.StringIO(content) reader = csv.DictReader(f) return [dict(row) for row in reader] diff --git a/test/test_csv_reader.py b/test/test_csv_reader.py index 48a808c..e62c093 100644 --- a/test/test_csv_reader.py +++ b/test/test_csv_reader.py @@ -6,6 +6,7 @@ import pytest reader = CSVReader() + def test_empty_csv_should_return_no_content(): content = "" result = reader.read_string(content) -- cgit v1.2.3