aboutsummaryrefslogtreecommitdiffstats
path: root/obfuscator
diff options
context:
space:
mode:
Diffstat (limited to 'obfuscator')
-rw-r--r--obfuscator/csv_reader.py33
-rw-r--r--obfuscator/obfuscate.py10
2 files changed, 40 insertions, 3 deletions
diff --git a/obfuscator/csv_reader.py b/obfuscator/csv_reader.py
index b9dccdb..23a34fc 100644
--- a/obfuscator/csv_reader.py
+++ b/obfuscator/csv_reader.py
@@ -3,32 +3,61 @@ import io
from typing import List, Dict
from obfuscator.logger import get_logger
+# Create the logger
logger = get_logger("CSVReader")
-
+# Putting the CSV reading components into a class may seem like overkill
+# for a simple script, but it allows for better organization and scalability.
+# @staticmethod is used to define the method without an instance of the class
+# being required. The methods could be defined just as functions, and this
+# may still be changed.
class CSVReader:
+ """
+ A class to read CSV data from a local file, S3 object, or string. Near
+ the project completion, support for JSON/Parquet files will be added.
+ """
@staticmethod
def read_local(path) -> List[Dict[str, str]]:
+ """
+ A method to read a local CSV file and return the data as a list of
+ dictionaries.
+ """
+ # Log the path of the file being read for debugging
logger.debug(f"Reading local CSV from: {path}")
-
+
+ # Attempt to read the file and return the data as a list of dictionaries
+ # However, if the file isn't found or there is a generic exception, log
+ # the error and raise an exception
try:
with open(path, mode="r", encoding="utf-8") as f:
reader = csv.DictReader(f)
return [dict(row) for row in reader]
except FileNotFoundError:
logger.error(f"File not found: {path}")
+ raise
except Exception as e:
logger.error(f"Error reading file: {e}")
@staticmethod
def read_s3(path) -> List[Dict[str, str]]:
+ """
+ A method to read an S3 object containing CSV data
+ and return the data as a list of dictionaries.
+ """
+ # Yet to be implemented.
return []
@staticmethod
def read_string(content: str) -> List[Dict[str, str]]:
+ """
+ A method to read CSV data from a string and return the data as a list
+ of dictionaries.
+ """
+ # If the content is empty, return an empty list
if not content.strip():
return []
+ # Treat the string as a file-like object and return as list of dictionaries
f = io.StringIO(content)
reader = csv.DictReader(f)
return [dict(row) for row in reader]
diff --git a/obfuscator/obfuscate.py b/obfuscator/obfuscate.py
index ac0bd21..3da9155 100644
--- a/obfuscator/obfuscate.py
+++ b/obfuscator/obfuscate.py
@@ -1,16 +1,24 @@
from typing import List, Dict
from obfuscator.logger import get_logger
+# Create the logger
logger = get_logger("Obfuscator")
-
def obfuscate(
data: List[Dict[str, str]], pii_fields: List[str]
) -> List[Dict[str, str]]:
+ """
+ A function to obfuscate PII fields in a list of dictionaries, replacing
+ sensitive values with a string of asterisks.
+ """
+ # If no data is provided, log a message and return an empty list
if not data:
logger.info("No valid data was provided to obfuscate")
return []
+ # Obfuscate the PII fields in each record using a list/dict comprehension
+ # This code is good but makes debugging a bit tricky. I may consider
+ # breaking it down into a for loop.
return [
{k: ("***" if k in pii_fields else v) for k, v in record.items()}
for record in data
git.ajschof.me — hosted by ajschofield — powered by cgit