diff options
| author | Alex <git@ajschof.me> | 2025-02-17 14:01:24 +0000 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2025-02-17 14:01:24 +0000 |
| commit | bf323b8c2ebd47bb446ba773027f389a0887e325 (patch) | |
| tree | a99037c07a2b2bce642a0ce4648ac858ac0e325c | |
| parent | 73462d62d48cd3cf061697f9f6a390437ee29f2d (diff) | |
| parent | e796c7bb6cc6de6368c2d195e233d0b11cf7e699 (diff) | |
| download | gdpr-obfuscator-bf323b8c2ebd47bb446ba773027f389a0887e325.tar.gz gdpr-obfuscator-bf323b8c2ebd47bb446ba773027f389a0887e325.zip | |
Merge pull request #2 from ajschofield/feat/obfuscator
merge obfuscator into stable
| -rw-r--r-- | cli.py | 10 | ||||
| -rw-r--r-- | obfuscator/obfuscate.py | 17 | ||||
| -rw-r--r-- | test/test_obfuscator.py | 86 |
3 files changed, 112 insertions, 1 deletions
@@ -1,5 +1,7 @@ import argparse +import json from obfuscator.csv_reader import CSVReader +from obfuscator.obfuscate import obfuscate from obfuscator.logger import get_logger logger = get_logger("CLI") @@ -11,16 +13,22 @@ def main(): loc = parser.add_mutually_exclusive_group(required=True) loc.add_argument("--local") loc.add_argument("--s3") + + parser.add_argument("--pii", nargs="+", required=True) + args = parser.parse_args() if args.local and not args.s3: logger.debug("User chose to read CSV from local path") reader = CSVReader() data = reader.read_local(args.local) - print(data) + logger.debug(data) else: logger.debug("User chose to read CSV from S3") + obfuscated_data = obfuscate(data, args.pii) + logger.debug(json.dumps(obfuscated_data, indent=4)) + if __name__ == "__main__": main() diff --git a/obfuscator/obfuscate.py b/obfuscator/obfuscate.py new file mode 100644 index 0000000..ac0bd21 --- /dev/null +++ b/obfuscator/obfuscate.py @@ -0,0 +1,17 @@ +from typing import List, Dict +from obfuscator.logger import get_logger + +logger = get_logger("Obfuscator") + + +def obfuscate( + data: List[Dict[str, str]], pii_fields: List[str] +) -> List[Dict[str, str]]: + if not data: + logger.info("No valid data was provided to obfuscate") + return [] + + return [ + {k: ("***" if k in pii_fields else v) for k, v in record.items()} + for record in data + ] diff --git a/test/test_obfuscator.py b/test/test_obfuscator.py new file mode 100644 index 0000000..c77b6b4 --- /dev/null +++ b/test/test_obfuscator.py @@ -0,0 +1,86 @@ +from obfuscator.obfuscate import obfuscate + + +def test_obfuscate_data_with_valid_pii_fields(): + data = [ + { + "student_id": "1234", + "name": "John Smith", + "course": "Software", + "email_address": "j.smith@email.com", + }, + { + "student_id": "5678", + "name": "Jane Doe", + "course": "Data Science", + "email_address": "j.doe@email.com", + }, + ] + pii_fields = ["name", "email_address"] + expected = [ + { + "student_id": "1234", + "name": "***", + "course": "Software", + "email_address": "***", + }, + { + "student_id": "5678", + "name": "***", + "course": "Data Science", + "email_address": "***", + }, + ] + + result = obfuscate(data, pii_fields) + assert result == expected + + +def test_obfuscate_data_with_missing_pii_field(): + data = [ + {"student_id": "1234", "name": "John Smith", "course": "Software"}, + { + "student_id": "5678", + "name": "Jane Doe", + "course": "Data Science", + "email_address": "j.doe@email.com", + }, + ] + pii_fields = ["name", "email_address"] + expected = [ + {"student_id": "1234", "name": "***", "course": "Software"}, + { + "student_id": "5678", + "name": "***", + "course": "Data Science", + "email_address": "***", + }, + ] + + result = obfuscate(data, pii_fields) + assert result == expected + + +def test_obfuscate_data_with_no_data(): + data = [] + pii_fields = ["name", "email_address"] + expected = [] + + result = obfuscate(data, pii_fields) + assert result == expected + + +def test_obfuscate_data_with_empty_pii_fields(): + data = [ + { + "student_id": "1234", + "name": "John Smith", + "course": "Software", + "email_address": "j.smith@email.com", + } + ] + pii_fields = [] + expected = data.copy() + + result = obfuscate(data, pii_fields) + assert result == expected |
