aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--cli.py16
-rw-r--r--obfuscator/csv_reader.py19
-rw-r--r--obfuscator/csv_writer.py1
-rw-r--r--obfuscator/obfuscate.py5
-rw-r--r--obfuscator/utils.py1
-rw-r--r--test/test_csv_reader.py23
-rw-r--r--test/test_obfuscator.py15
7 files changed, 0 insertions, 80 deletions
diff --git a/cli.py b/cli.py
index d19c18d..c03ffd1 100644
--- a/cli.py
+++ b/cli.py
@@ -7,7 +7,6 @@ from obfuscator.csv_writer import create_byte_stream
def main():
- # Create an argument parser
parser = argparse.ArgumentParser(
prog="GDPR-Obfuscator",
description="Obfuscate sensitive data stored locally or in an AWS environment",
@@ -17,16 +16,10 @@ def main():
"-v", "--verbose", action="store_true", help="Enable verbose logging"
)
- # Require user to either choose a local file or an S3 object
- # The user can only choose one of these options or the program will exit
- # If not provided, the program will exit
loc = parser.add_mutually_exclusive_group(required=True)
loc.add_argument("-l", "--local", help="Local path to file")
loc.add_argument("-s", "--s3", help="URI path to file stored in S3")
- # Require user to provide a list of PII fields to obfuscate
- # e.g. --pii name email_address
- # If not provided, the program will exit
parser.add_argument(
"-p",
"--pii",
@@ -35,23 +28,17 @@ def main():
help="List of PII fields to obfuscate, separated by spaces",
)
- # Parse the arguments
args = parser.parse_args()
- # If the user chose verbose logging, set the logger to debug
log_level = "DEBUG" if args.verbose else "INFO"
- # Create the logger
logger = get_logger("CLI", log_level)
- # Create the CSVReader object
reader = CSVReader(log_level)
- # Read the CSV data based on the user's choice of local or S3
if args.local and not args.s3:
logger.debug("User chose to read CSV from local path")
data = reader.read_local(args.local)
- # For debug purposes, log the data read from the CSV
logger.debug("Contents: " + str(data))
else:
logger.debug("User chose to read CSV from S3")
@@ -59,13 +46,10 @@ def main():
data = reader.read_s3(args.s3)
logger.debug("Contents: " + str(data))
- # Obfuscate the data based on the user's choice of PII fields
obfuscated_data = obfuscate(data, args.pii)
- # For debug purposes, log the obfuscated data as JSON for readability
logger.debug("Obfuscated data (JSON): " + json.dumps(obfuscated_data, indent=4))
return create_byte_stream(obfuscated_data)
-# If the script is run directly (as it should be), call the main function
if __name__ == "__main__":
main()
diff --git a/obfuscator/csv_reader.py b/obfuscator/csv_reader.py
index f8dd7d3..2b099c8 100644
--- a/obfuscator/csv_reader.py
+++ b/obfuscator/csv_reader.py
@@ -6,12 +6,6 @@ from typing import List, Dict
from obfuscator.logger import get_logger
from obfuscator.utils import Utilities
-# Putting the CSV reading components into a class may seem like overkill
-# for a simple script, but it allows for better organization and scalability.
-# @staticmethod is used to define the method without an instance of the class
-# being required. The methods could be defined just as functions, and this
-# may still be changed.
-
class CSVReader:
"""
@@ -21,7 +15,6 @@ class CSVReader:
def __init__(self, log_level=None):
self.log_level = log_level
- # Create the logger
self.logger = get_logger("CSVREADER", log_level)
def read_local(self, path) -> List[Dict[str, str]]:
@@ -29,12 +22,8 @@ class CSVReader:
A method to read a local CSV file and return the data as a list of
dictionaries.
"""
- # Log the path of the file being read for debugging
self.logger.debug(f"Reading local CSV from: {path}")
- # Attempt to read the file and return the data as a list of dictionaries
- # However, if the file isn't found or there is a generic exception, log
- # the error and raise an exception
try:
with open(path, mode="r", encoding="utf-8") as f:
reader = csv.DictReader(f)
@@ -54,7 +43,6 @@ class CSVReader:
bucket, key = utils.get_s3_path(path)
self.logger.debug(f"Reading S3 CSV from: {bucket}/{key}")
- # If LOCALSTACK=TRUE, use the localstack endpoint for testing
if os.getenv("LOCALSTACK", "FALSE").upper() == "TRUE":
localstack_endpoint = "http://localhost.localstack.cloud:4566"
self.logger.debug("Using LocalStack endpoint for S3")
@@ -69,15 +57,10 @@ class CSVReader:
client = boto3.client("s3")
try:
- # Attempt to read the S3 object and return the data as a list of dictionaries
response = client.get_object(Bucket=bucket, Key=key)
self.logger.info("S3 object read successfully")
- # Read and decode the content
content = response["Body"].read().decode("utf-8")
- # Even though the read_string method was only created for testing,
- # it can be reused here to read and return the CSV data
return CSVReader.read_string(content)
- # TODO: Add more specific exceptions to catch
except Exception as e:
self.logger.error(f"Error reading S3 object: {e}")
raise
@@ -87,11 +70,9 @@ class CSVReader:
A method to read CSV data from a string and return the data as a list
of dictionaries.
"""
- # If the content is empty, return an empty list
if not content.strip():
return []
- # Treat the string as a file-like object and return as list of dictionaries
f = io.StringIO(content)
reader = csv.DictReader(f)
return [dict(row) for row in reader]
diff --git a/obfuscator/csv_writer.py b/obfuscator/csv_writer.py
index 099e910..56b3f1f 100644
--- a/obfuscator/csv_writer.py
+++ b/obfuscator/csv_writer.py
@@ -3,7 +3,6 @@ import io
from typing import List, Dict
from obfuscator.logger import get_logger
-# Create the logger
logger = get_logger("CSVWRITER")
diff --git a/obfuscator/obfuscate.py b/obfuscator/obfuscate.py
index 4f7e6c1..e964433 100644
--- a/obfuscator/obfuscate.py
+++ b/obfuscator/obfuscate.py
@@ -1,7 +1,6 @@
from typing import List, Dict
from obfuscator.logger import get_logger
-# Create the logger
logger = get_logger("OBFUSCATE")
@@ -12,14 +11,10 @@ def obfuscate(
A function to obfuscate PII fields in a list of dictionaries, replacing
sensitive values with a string of asterisks.
"""
- # If no data is provided, log a message and return an empty list
if not data:
logger.info("No valid data was provided to obfuscate")
return []
- # Obfuscate the PII fields in each record using a list/dict comprehension
- # This code is good but makes debugging a bit tricky. I may consider
- # breaking it down into a for loop.
return [
{k: ("***" if k in pii_fields else v) for k, v in record.items()}
for record in data
diff --git a/obfuscator/utils.py b/obfuscator/utils.py
index 81eb04a..f61451b 100644
--- a/obfuscator/utils.py
+++ b/obfuscator/utils.py
@@ -5,7 +5,6 @@ from obfuscator.logger import get_logger
class Utilities:
def __init__(self, logger=None):
- # Create the logger
self.logger = get_logger("UTILITIES", logger)
def get_s3_path(self, uri):
diff --git a/test/test_csv_reader.py b/test/test_csv_reader.py
index e4c135b..0206542 100644
--- a/test/test_csv_reader.py
+++ b/test/test_csv_reader.py
@@ -1,6 +1,3 @@
-# csv_reader.py - tests for read_string and read_s3
-# Author: Alex Schofield
-
import boto3
from moto import mock_aws
from obfuscator.csv_reader import CSVReader
@@ -8,11 +5,6 @@ import pytest
reader = CSVReader()
-# CSVREADER: READ_STRING TESTS
-
-# Check if the function can read a CSV string with no content and return
-# an empty list
-
def test_empty_csv_should_return_no_content():
content = ""
@@ -21,10 +13,6 @@ def test_empty_csv_should_return_no_content():
assert result == expected
-# Check if the function can read a CSV string with only a header and return
-# an empty list
-
-
def test_csv_with_header_only_should_return_no_content():
content = "student_id,name,course\n"
result = reader.read_string(content)
@@ -32,10 +20,6 @@ def test_csv_with_header_only_should_return_no_content():
assert result == expected
-# Check if the function can read a CSV string with valid data and return
-# a list of dictionaries
-
-
def test_csv_with_valid_data():
content = (
"student_id,name,course\n"
@@ -50,10 +34,6 @@ def test_csv_with_valid_data():
assert result == expected
-# Check if the function can read a CSV string with quoted fields and return
-# a list of dictionaries with the quoted fields intact
-
-
def test_csv_with_quoted_fields_should_run_as_expected():
content = (
"student_id,name,course\n"
@@ -68,9 +48,6 @@ def test_csv_with_quoted_fields_should_run_as_expected():
assert result == expected
-# CSVREADER: READ_S3 TESTS
-
-
def setup_s3(s3_client, bucket: str, key: str, content: str):
s3_client.create_bucket(
Bucket=bucket,
diff --git a/test/test_obfuscator.py b/test/test_obfuscator.py
index 4f61b16..c77b6b4 100644
--- a/test/test_obfuscator.py
+++ b/test/test_obfuscator.py
@@ -1,8 +1,5 @@
from obfuscator.obfuscate import obfuscate
-# Check if the function does what its supposed to and can obfuscate
-# valid PII fields in a list of dictionaries
-
def test_obfuscate_data_with_valid_pii_fields():
data = [
@@ -39,11 +36,6 @@ def test_obfuscate_data_with_valid_pii_fields():
assert result == expected
-# Check if the function can obfuscate data even when some PII
-# fields are missing from some of the data, returning a list of dictionaries
-# but with the missing PII fields obfuscated and the rest of the data intact
-
-
def test_obfuscate_data_with_missing_pii_field():
data = [
{"student_id": "1234", "name": "John Smith", "course": "Software"},
@@ -69,9 +61,6 @@ def test_obfuscate_data_with_missing_pii_field():
assert result == expected
-# Check if the function can handle an empty list of data, returning an empty list
-
-
def test_obfuscate_data_with_no_data():
data = []
pii_fields = ["name", "email_address"]
@@ -81,10 +70,6 @@ def test_obfuscate_data_with_no_data():
assert result == expected
-# Check if the function can handle an empty list of PII fields, returning the data as is
-# without mutating it
-
-
def test_obfuscate_data_with_empty_pii_fields():
data = [
{
git.ajschof.me — hosted by ajschofield — powered by cgit