aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--cli.py10
-rw-r--r--obfuscator/obfuscate.py17
-rw-r--r--test/test_obfuscator.py86
3 files changed, 112 insertions, 1 deletions
diff --git a/cli.py b/cli.py
index 19e64d8..c6442c7 100644
--- a/cli.py
+++ b/cli.py
@@ -1,5 +1,7 @@
import argparse
+import json
from obfuscator.csv_reader import CSVReader
+from obfuscator.obfuscate import obfuscate
from obfuscator.logger import get_logger
logger = get_logger("CLI")
@@ -11,16 +13,22 @@ def main():
loc = parser.add_mutually_exclusive_group(required=True)
loc.add_argument("--local")
loc.add_argument("--s3")
+
+ parser.add_argument("--pii", nargs="+", required=True)
+
args = parser.parse_args()
if args.local and not args.s3:
logger.debug("User chose to read CSV from local path")
reader = CSVReader()
data = reader.read_local(args.local)
- print(data)
+ logger.debug(data)
else:
logger.debug("User chose to read CSV from S3")
+ obfuscated_data = obfuscate(data, args.pii)
+ logger.debug(json.dumps(obfuscated_data, indent=4))
+
if __name__ == "__main__":
main()
diff --git a/obfuscator/obfuscate.py b/obfuscator/obfuscate.py
new file mode 100644
index 0000000..ac0bd21
--- /dev/null
+++ b/obfuscator/obfuscate.py
@@ -0,0 +1,17 @@
+from typing import List, Dict
+from obfuscator.logger import get_logger
+
+logger = get_logger("Obfuscator")
+
+
+def obfuscate(
+ data: List[Dict[str, str]], pii_fields: List[str]
+) -> List[Dict[str, str]]:
+ if not data:
+ logger.info("No valid data was provided to obfuscate")
+ return []
+
+ return [
+ {k: ("***" if k in pii_fields else v) for k, v in record.items()}
+ for record in data
+ ]
diff --git a/test/test_obfuscator.py b/test/test_obfuscator.py
new file mode 100644
index 0000000..c77b6b4
--- /dev/null
+++ b/test/test_obfuscator.py
@@ -0,0 +1,86 @@
+from obfuscator.obfuscate import obfuscate
+
+
+def test_obfuscate_data_with_valid_pii_fields():
+ data = [
+ {
+ "student_id": "1234",
+ "name": "John Smith",
+ "course": "Software",
+ "email_address": "j.smith@email.com",
+ },
+ {
+ "student_id": "5678",
+ "name": "Jane Doe",
+ "course": "Data Science",
+ "email_address": "j.doe@email.com",
+ },
+ ]
+ pii_fields = ["name", "email_address"]
+ expected = [
+ {
+ "student_id": "1234",
+ "name": "***",
+ "course": "Software",
+ "email_address": "***",
+ },
+ {
+ "student_id": "5678",
+ "name": "***",
+ "course": "Data Science",
+ "email_address": "***",
+ },
+ ]
+
+ result = obfuscate(data, pii_fields)
+ assert result == expected
+
+
+def test_obfuscate_data_with_missing_pii_field():
+ data = [
+ {"student_id": "1234", "name": "John Smith", "course": "Software"},
+ {
+ "student_id": "5678",
+ "name": "Jane Doe",
+ "course": "Data Science",
+ "email_address": "j.doe@email.com",
+ },
+ ]
+ pii_fields = ["name", "email_address"]
+ expected = [
+ {"student_id": "1234", "name": "***", "course": "Software"},
+ {
+ "student_id": "5678",
+ "name": "***",
+ "course": "Data Science",
+ "email_address": "***",
+ },
+ ]
+
+ result = obfuscate(data, pii_fields)
+ assert result == expected
+
+
+def test_obfuscate_data_with_no_data():
+ data = []
+ pii_fields = ["name", "email_address"]
+ expected = []
+
+ result = obfuscate(data, pii_fields)
+ assert result == expected
+
+
+def test_obfuscate_data_with_empty_pii_fields():
+ data = [
+ {
+ "student_id": "1234",
+ "name": "John Smith",
+ "course": "Software",
+ "email_address": "j.smith@email.com",
+ }
+ ]
+ pii_fields = []
+ expected = data.copy()
+
+ result = obfuscate(data, pii_fields)
+ assert result == expected
git.ajschof.me — hosted by ajschofield — powered by cgit