diff options
| -rw-r--r-- | cli.py | 2 | ||||
| -rw-r--r-- | obfuscator/csv_reader.py | 9 | ||||
| -rw-r--r-- | obfuscator/obfuscate.py | 1 | ||||
| -rw-r--r-- | test/test_csv_reader.py | 11 | ||||
| -rw-r--r-- | test/test_obfuscator.py | 13 |
5 files changed, 32 insertions, 4 deletions
@@ -7,6 +7,7 @@ from obfuscator.logger import get_logger # Create the logger logger = get_logger("CLI") + def main(): # Create an argument parser parser = argparse.ArgumentParser(description="gdpr-obfuscator") @@ -41,6 +42,7 @@ def main(): # For debug purposes, log the obfuscated data as JSON for readability logger.debug(json.dumps(obfuscated_data, indent=4)) + # If the script is run directly (as it should be), call the main function if __name__ == "__main__": main() diff --git a/obfuscator/csv_reader.py b/obfuscator/csv_reader.py index 23a34fc..eb93609 100644 --- a/obfuscator/csv_reader.py +++ b/obfuscator/csv_reader.py @@ -9,13 +9,16 @@ logger = get_logger("CSVReader") # Putting the CSV reading components into a class may seem like overkill # for a simple script, but it allows for better organization and scalability. # @staticmethod is used to define the method without an instance of the class -# being required. The methods could be defined just as functions, and this +# being required. The methods could be defined just as functions, and this # may still be changed. + + class CSVReader: """ A class to read CSV data from a local file, S3 object, or string. Near the project completion, support for JSON/Parquet files will be added. """ + @staticmethod def read_local(path) -> List[Dict[str, str]]: """ @@ -24,7 +27,7 @@ class CSVReader: """ # Log the path of the file being read for debugging logger.debug(f"Reading local CSV from: {path}") - + # Attempt to read the file and return the data as a list of dictionaries # However, if the file isn't found or there is a generic exception, log # the error and raise an exception @@ -34,7 +37,7 @@ class CSVReader: return [dict(row) for row in reader] except FileNotFoundError: logger.error(f"File not found: {path}") - raise + raise except Exception as e: logger.error(f"Error reading file: {e}") diff --git a/obfuscator/obfuscate.py b/obfuscator/obfuscate.py index 3da9155..3f589cb 100644 --- a/obfuscator/obfuscate.py +++ b/obfuscator/obfuscate.py @@ -4,6 +4,7 @@ from obfuscator.logger import get_logger # Create the logger logger = get_logger("Obfuscator") + def obfuscate( data: List[Dict[str, str]], pii_fields: List[str] ) -> List[Dict[str, str]]: diff --git a/test/test_csv_reader.py b/test/test_csv_reader.py index 1b3d071..af13cff 100644 --- a/test/test_csv_reader.py +++ b/test/test_csv_reader.py @@ -7,22 +7,30 @@ reader = CSVReader() # Check if the function can read a CSV string with no content and return # an empty list + + def test_empty_csv_should_return_no_content(): content = "" result = reader.read_string(content) expected = [] assert result == expected + # Check if the function can read a CSV string with only a header and return # an empty list + + def test_csv_with_header_only_should_return_no_content(): content = "student_id,name,course\n" result = reader.read_string(content) expected = [] assert result == expected + # Check if the function can read a CSV string with valid data and return # a list of dictionaries + + def test_csv_with_valid_data(): content = ( "student_id,name,course\n" @@ -36,8 +44,11 @@ def test_csv_with_valid_data(): ] assert result == expected + # Check if the function can read a CSV string with quoted fields and return # a list of dictionaries with the quoted fields intact + + def test_csv_with_quoted_fields_should_run_as_expected(): content = ( "student_id,name,course\n" diff --git a/test/test_obfuscator.py b/test/test_obfuscator.py index cc7d2c1..4f61b16 100644 --- a/test/test_obfuscator.py +++ b/test/test_obfuscator.py @@ -1,7 +1,9 @@ from obfuscator.obfuscate import obfuscate -# Check if the function does what its supposed to and can obfuscate +# Check if the function does what its supposed to and can obfuscate # valid PII fields in a list of dictionaries + + def test_obfuscate_data_with_valid_pii_fields(): data = [ { @@ -36,9 +38,12 @@ def test_obfuscate_data_with_valid_pii_fields(): result = obfuscate(data, pii_fields) assert result == expected + # Check if the function can obfuscate data even when some PII # fields are missing from some of the data, returning a list of dictionaries # but with the missing PII fields obfuscated and the rest of the data intact + + def test_obfuscate_data_with_missing_pii_field(): data = [ {"student_id": "1234", "name": "John Smith", "course": "Software"}, @@ -63,7 +68,10 @@ def test_obfuscate_data_with_missing_pii_field(): result = obfuscate(data, pii_fields) assert result == expected + # Check if the function can handle an empty list of data, returning an empty list + + def test_obfuscate_data_with_no_data(): data = [] pii_fields = ["name", "email_address"] @@ -72,8 +80,11 @@ def test_obfuscate_data_with_no_data(): result = obfuscate(data, pii_fields) assert result == expected + # Check if the function can handle an empty list of PII fields, returning the data as is # without mutating it + + def test_obfuscate_data_with_empty_pii_fields(): data = [ { |
