From ff0ece8fb3f0cf247af441fe67f1ba16e9db5071 Mon Sep 17 00:00:00 2001 From: Alex Schofield Date: Wed, 12 Feb 2025 02:00:04 +0000 Subject: add initial cli script for obfuscator --- cli.py | 11 +++++++++++ 1 file changed, 11 insertions(+) create mode 100644 cli.py (limited to 'cli.py') diff --git a/cli.py b/cli.py new file mode 100644 index 0000000..45fc2f1 --- /dev/null +++ b/cli.py @@ -0,0 +1,11 @@ +import argparse +from obfuscator import csv_reader + +def main(): + parser = argparse.ArgumentParser(description="gdpr-obfuscator") + parser.add_argument("--local", help="Path to local CSV file") + parser.add_argument("--s3", help="Path to S3 object for CSV file") + args = parser.parse_args() + +if __name__ == "__main__": + main() -- cgit v1.2.3 From 6aa8722c071bcdf87ab22bb23d561ff65ca251f6 Mon Sep 17 00:00:00 2001 From: Alex Schofield Date: Fri, 14 Feb 2025 12:12:44 +0000 Subject: rename main csv reader function in cli.py & csv_reader.py --- cli.py | 2 +- obfuscator/csv_reader.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'cli.py') diff --git a/cli.py b/cli.py index 45fc2f1..1911f5c 100644 --- a/cli.py +++ b/cli.py @@ -1,5 +1,5 @@ import argparse -from obfuscator import csv_reader +from obfuscator.csv_reader import read def main(): parser = argparse.ArgumentParser(description="gdpr-obfuscator") diff --git a/obfuscator/csv_reader.py b/obfuscator/csv_reader.py index 8ee40e9..8a9be9b 100644 --- a/obfuscator/csv_reader.py +++ b/obfuscator/csv_reader.py @@ -11,7 +11,7 @@ if not logger.handlers: handler.setFormatter(formatter) logger.addHandler(handler) -def csv_reader(content: str) -> List[Dict[str, str]]: +def read(content: str) -> List[Dict[str, str]]: f = StringIO(content) reader = csv.DictReader(f) logger.info("Finished reading CSV!") -- cgit v1.2.3 From a16fd48f1e87e9b101250aba532ec81de813e260 Mon Sep 17 00:00:00 2001 From: Alex Schofield Date: Fri, 14 Feb 2025 12:24:40 +0000 Subject: require only --local or --s3 to be chosen by user in cli.py --- cli.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'cli.py') diff --git a/cli.py b/cli.py index 1911f5c..81114d2 100644 --- a/cli.py +++ b/cli.py @@ -3,9 +3,16 @@ from obfuscator.csv_reader import read def main(): parser = argparse.ArgumentParser(description="gdpr-obfuscator") - parser.add_argument("--local", help="Path to local CSV file") - parser.add_argument("--s3", help="Path to S3 object for CSV file") + # Require user to either choose a local file or an S3 object + loc = parser.add_mutually_exclusive_group(required=True) + loc.add_argument("--local") + loc.add_argument("--s3") args = parser.parse_args() + if args.local and not args.s3: + print(read(args.local)) + else: + pass + if __name__ == "__main__": main() -- cgit v1.2.3 From 6ad17315d1d6a174b9ba7c597947e211ee87f40f Mon Sep 17 00:00:00 2001 From: Alex Schofield Date: Fri, 14 Feb 2025 12:25:41 +0000 Subject: add separate functions for reading locally and from s3 bucket --- cli.py | 2 +- obfuscator/csv_reader.py | 5 ++++- 2 files changed, 5 insertions(+), 2 deletions(-) (limited to 'cli.py') diff --git a/cli.py b/cli.py index 81114d2..0bbaf89 100644 --- a/cli.py +++ b/cli.py @@ -1,5 +1,5 @@ import argparse -from obfuscator.csv_reader import read +from obfuscator.csv_reader import read_local, read_s3 def main(): parser = argparse.ArgumentParser(description="gdpr-obfuscator") diff --git a/obfuscator/csv_reader.py b/obfuscator/csv_reader.py index 8a9be9b..4ae8a06 100644 --- a/obfuscator/csv_reader.py +++ b/obfuscator/csv_reader.py @@ -11,10 +11,13 @@ if not logger.handlers: handler.setFormatter(formatter) logger.addHandler(handler) -def read(content: str) -> List[Dict[str, str]]: +def read_local(content: str) -> List[Dict[str, str]]: f = StringIO(content) reader = csv.DictReader(f) logger.info("Finished reading CSV!") return list(reader) +def read_s3(): + pass + -- cgit v1.2.3 From 02478f2e48302441f86f3eeaba80119d6bc7ccf1 Mon Sep 17 00:00:00 2001 From: Alex Schofield Date: Fri, 14 Feb 2025 12:59:30 +0000 Subject: update cli.py to properly read from local csv files --- cli.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'cli.py') diff --git a/cli.py b/cli.py index 0bbaf89..bb12421 100644 --- a/cli.py +++ b/cli.py @@ -1,5 +1,5 @@ import argparse -from obfuscator.csv_reader import read_local, read_s3 +from obfuscator.csv_reader import CSVReader def main(): parser = argparse.ArgumentParser(description="gdpr-obfuscator") @@ -10,7 +10,9 @@ def main(): args = parser.parse_args() if args.local and not args.s3: - print(read(args.local)) + reader = CSVReader(args.local) + data = reader.read_local() + print(data) else: pass -- cgit v1.2.3 From 6182930c3ea53932c6153dd101264cb90c90f979 Mon Sep 17 00:00:00 2001 From: Alex Schofield Date: Fri, 14 Feb 2025 13:44:57 +0000 Subject: add universal logging for debugging --- cli.py | 6 +++++- obfuscator/csv_reader.py | 11 ++--------- obfuscator/logger.py | 21 +++++++++++++++++++++ 3 files changed, 28 insertions(+), 10 deletions(-) create mode 100644 obfuscator/logger.py (limited to 'cli.py') diff --git a/cli.py b/cli.py index bb12421..23c9057 100644 --- a/cli.py +++ b/cli.py @@ -1,5 +1,8 @@ import argparse from obfuscator.csv_reader import CSVReader +from obfuscator.logger import get_logger + +logger = get_logger("CLI") def main(): parser = argparse.ArgumentParser(description="gdpr-obfuscator") @@ -10,11 +13,12 @@ def main(): args = parser.parse_args() if args.local and not args.s3: + logger.debug("User chose to read CSV from local path") reader = CSVReader(args.local) data = reader.read_local() print(data) else: - pass + logger.debug("User chose to read CSV from S3") if __name__ == "__main__": main() diff --git a/obfuscator/csv_reader.py b/obfuscator/csv_reader.py index b365d45..42e8f8a 100644 --- a/obfuscator/csv_reader.py +++ b/obfuscator/csv_reader.py @@ -1,15 +1,8 @@ import csv from typing import List, Dict -import logging - -logger = logging.getLogger(__name__) -logger.setLevel(logging.INFO) -if not logger.handlers: - handler = logging.StreamHandler() - formatter = logging.Formatter('%(asctime)s %(levelname)s %(message)s') - handler.setFormatter(formatter) - logger.addHandler(handler) +from logger import get_logger +logger = get_logger("CSVReader") class CSVReader: def __init__(self, path: str): diff --git a/obfuscator/logger.py b/obfuscator/logger.py new file mode 100644 index 0000000..52c4f5e --- /dev/null +++ b/obfuscator/logger.py @@ -0,0 +1,21 @@ +import logging +import os + +def get_logger(name: str) -> logging.Logger: + logger = logging.getLogger(name) + + if not logger.hasHandlers(): + if os.getenv("DEBUG", "FALSE").upper() == "TRUE": + log_level = logging.DEBUG + else: + log_level = logging.INFO + + logger.setLevel(log_level) + + handler = logging.StreamHandler() + format = logging.Formatter("%(asctime)s - %(levelname)s - %(name)s - %(message)s") + handler.setFormatter(format) + + logger.addHandler(handler) + + return logger \ No newline at end of file -- cgit v1.2.3 From 0f29d52ba932eeed2ae5826c31cbe9a379fd4579 Mon Sep 17 00:00:00 2001 From: "deepsource-autofix[bot]" <62050782+deepsource-autofix[bot]@users.noreply.github.com> Date: Fri, 14 Feb 2025 13:47:05 +0000 Subject: style: format code with Autopep8, Black and Ruff Formatter This commit fixes the style issues introduced in 6182930 according to the output from Autopep8, Black and Ruff Formatter. Details: https://github.com/ajschofield/gdpr-obfuscator/pull/1 --- cli.py | 2 ++ obfuscator/csv_reader.py | 4 +--- obfuscator/logger.py | 7 +++++-- test/test_csv_reader.py | 15 ++++++++++----- 4 files changed, 18 insertions(+), 10 deletions(-) (limited to 'cli.py') diff --git a/cli.py b/cli.py index 23c9057..652830e 100644 --- a/cli.py +++ b/cli.py @@ -4,6 +4,7 @@ from obfuscator.logger import get_logger logger = get_logger("CLI") + def main(): parser = argparse.ArgumentParser(description="gdpr-obfuscator") # Require user to either choose a local file or an S3 object @@ -20,5 +21,6 @@ def main(): else: logger.debug("User chose to read CSV from S3") + if __name__ == "__main__": main() diff --git a/obfuscator/csv_reader.py b/obfuscator/csv_reader.py index 42e8f8a..5bc91dc 100644 --- a/obfuscator/csv_reader.py +++ b/obfuscator/csv_reader.py @@ -4,6 +4,7 @@ from logger import get_logger logger = get_logger("CSVReader") + class CSVReader: def __init__(self, path: str): self.path = path @@ -27,6 +28,3 @@ class CSVReader: def read_s3(self) -> List[Dict[str, str]]: return [] - - - diff --git a/obfuscator/logger.py b/obfuscator/logger.py index 52c4f5e..61a75ec 100644 --- a/obfuscator/logger.py +++ b/obfuscator/logger.py @@ -1,6 +1,7 @@ import logging import os + def get_logger(name: str) -> logging.Logger: logger = logging.getLogger(name) @@ -13,9 +14,11 @@ def get_logger(name: str) -> logging.Logger: logger.setLevel(log_level) handler = logging.StreamHandler() - format = logging.Formatter("%(asctime)s - %(levelname)s - %(name)s - %(message)s") + format = logging.Formatter( + "%(asctime)s - %(levelname)s - %(name)s - %(message)s" + ) handler.setFormatter(format) logger.addHandler(handler) - return logger \ No newline at end of file + return logger diff --git a/test/test_csv_reader.py b/test/test_csv_reader.py index d245d7c..ac43b04 100644 --- a/test/test_csv_reader.py +++ b/test/test_csv_reader.py @@ -4,34 +4,38 @@ from main import csv_reader import pytest + def test_empty_csv_should_return_no_content(): content = "" result = csv_reader(content) expected = [] assert result == expected + def test_csv_with_header_only_should_return_no_content(): content = "student_id,name,course\n" result = csv_reader(content) expected = [] assert result == expected + def test_csv_with_valid_data(): content = ( "student_id,name,course\n" "1234,Student 1,Course 1\n" "5678,Student 2,Course 2\n" - ) + ) result = csv_reader(content) expected = [ {"student_id": "1234", "name": "Student 1", "course": "Course 1"}, {"student_id": "5678", "name": "Student 2", "course": "Course 2"}, - ] - assert result == expected + ] + assert result == expected + def test_csv_with_quoted_fields_should_run_as_expected(): content = ( - 'student_id,name,course\n' + "student_id,name,course\n" '1234,"Student 1","Course 1"\n' '5678,"Student 2","Course 2"\n' ) @@ -42,8 +46,9 @@ def test_csv_with_quoted_fields_should_run_as_expected(): ] assert result == expected + def test_non_csv_file_should_return_no_content(): content = "" result = csv_reader(content) expected = [] - assert result == expected \ No newline at end of file + assert result == expected -- cgit v1.2.3 From bebcd423e7ca5aa620b8ceeef67e77daa3f14f0f Mon Sep 17 00:00:00 2001 From: Alex Schofield Date: Fri, 14 Feb 2025 13:59:03 +0000 Subject: process path in CSVReader methods & fix import paths --- cli.py | 4 ++-- obfuscator/csv_reader.py | 14 +++++++------- 2 files changed, 9 insertions(+), 9 deletions(-) (limited to 'cli.py') diff --git a/cli.py b/cli.py index 23c9057..885573d 100644 --- a/cli.py +++ b/cli.py @@ -14,8 +14,8 @@ def main(): if args.local and not args.s3: logger.debug("User chose to read CSV from local path") - reader = CSVReader(args.local) - data = reader.read_local() + reader = CSVReader() + data = reader.read_local(args.local) print(data) else: logger.debug("User chose to read CSV from S3") diff --git a/obfuscator/csv_reader.py b/obfuscator/csv_reader.py index 42e8f8a..901d396 100644 --- a/obfuscator/csv_reader.py +++ b/obfuscator/csv_reader.py @@ -1,24 +1,24 @@ import csv from typing import List, Dict -from logger import get_logger +from obfuscator.logger import get_logger logger = get_logger("CSVReader") class CSVReader: - def __init__(self, path: str): - self.path = path + def __init__(self): + pass - def read_local(self) -> List[Dict[str, str]]: - logger.debug(f"Reading local CSV from: {self.path}") + def read_local(self, path) -> List[Dict[str, str]]: + logger.debug(f"Reading local CSV from: {path}") data = [] try: - with open(self.path, mode="r", encoding="utf-8") as file: + with open(path, mode="r", encoding="utf-8") as file: reader = csv.DictReader(file) for row in reader: data.append(dict(row)) except FileNotFoundError: - logger.error(f"File not found: {self.path}") + logger.error(f"File not found: {path}") except Exception as e: logger.error(f"Error reading file: {e}") -- cgit v1.2.3