From de33c0c98201a275244a71826d11bb8ee3a12245 Mon Sep 17 00:00:00 2001 From: Alex Schofield Date: Mon, 17 Feb 2025 14:52:48 +0000 Subject: add comments to csv_reader.py to explain code --- obfuscator/csv_reader.py | 33 +++++++++++++++++++++++++++++++-- 1 file changed, 31 insertions(+), 2 deletions(-) (limited to 'obfuscator') diff --git a/obfuscator/csv_reader.py b/obfuscator/csv_reader.py index b9dccdb..23a34fc 100644 --- a/obfuscator/csv_reader.py +++ b/obfuscator/csv_reader.py @@ -3,32 +3,61 @@ import io from typing import List, Dict from obfuscator.logger import get_logger +# Create the logger logger = get_logger("CSVReader") - +# Putting the CSV reading components into a class may seem like overkill +# for a simple script, but it allows for better organization and scalability. +# @staticmethod is used to define the method without an instance of the class +# being required. The methods could be defined just as functions, and this +# may still be changed. class CSVReader: + """ + A class to read CSV data from a local file, S3 object, or string. Near + the project completion, support for JSON/Parquet files will be added. + """ @staticmethod def read_local(path) -> List[Dict[str, str]]: + """ + A method to read a local CSV file and return the data as a list of + dictionaries. + """ + # Log the path of the file being read for debugging logger.debug(f"Reading local CSV from: {path}") - + + # Attempt to read the file and return the data as a list of dictionaries + # However, if the file isn't found or there is a generic exception, log + # the error and raise an exception try: with open(path, mode="r", encoding="utf-8") as f: reader = csv.DictReader(f) return [dict(row) for row in reader] except FileNotFoundError: logger.error(f"File not found: {path}") + raise except Exception as e: logger.error(f"Error reading file: {e}") @staticmethod def read_s3(path) -> List[Dict[str, str]]: + """ + A method to read an S3 object containing CSV data + and return the data as a list of dictionaries. + """ + # Yet to be implemented. return [] @staticmethod def read_string(content: str) -> List[Dict[str, str]]: + """ + A method to read CSV data from a string and return the data as a list + of dictionaries. + """ + # If the content is empty, return an empty list if not content.strip(): return [] + # Treat the string as a file-like object and return as list of dictionaries f = io.StringIO(content) reader = csv.DictReader(f) return [dict(row) for row in reader] -- cgit v1.2.3 From 3837fb40c1f70fa8bfd65872cc1c85963903fe3a Mon Sep 17 00:00:00 2001 From: Alex Schofield Date: Mon, 17 Feb 2025 14:55:53 +0000 Subject: add comments to obfuscate.py to explain code --- obfuscator/obfuscate.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'obfuscator') diff --git a/obfuscator/obfuscate.py b/obfuscator/obfuscate.py index ac0bd21..3da9155 100644 --- a/obfuscator/obfuscate.py +++ b/obfuscator/obfuscate.py @@ -1,16 +1,24 @@ from typing import List, Dict from obfuscator.logger import get_logger +# Create the logger logger = get_logger("Obfuscator") - def obfuscate( data: List[Dict[str, str]], pii_fields: List[str] ) -> List[Dict[str, str]]: + """ + A function to obfuscate PII fields in a list of dictionaries, replacing + sensitive values with a string of asterisks. + """ + # If no data is provided, log a message and return an empty list if not data: logger.info("No valid data was provided to obfuscate") return [] + # Obfuscate the PII fields in each record using a list/dict comprehension + # This code is good but makes debugging a bit tricky. I may consider + # breaking it down into a for loop. return [ {k: ("***" if k in pii_fields else v) for k, v in record.items()} for record in data -- cgit v1.2.3