1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
|
import argparse
import json
from obfuscator.csv_reader import CSVReader
from obfuscator.obfuscate import obfuscate
from obfuscator.logger import get_logger
from obfuscator.csv_writer import create_byte_stream
# Create the logger
logger = get_logger("CLI")
def main():
# Create an argument parser
parser = argparse.ArgumentParser(
prog="GDPR-Obfuscator",
description="Obfuscate sensitive data stored locally or in an AWS environment",
)
parser.add_argument(
"-v", "--verbose", action="store_true", help="Enable verbose logging"
)
# Require user to either choose a local file or an S3 object
# The user can only choose one of these options or the program will exit
# If not provided, the program will exit
loc = parser.add_mutually_exclusive_group(required=True)
loc.add_argument("-l", "--local", help="Path to local CSV file")
loc.add_argument(
"-s", "--s3", help="S3 object path (example: s3://bucket-name/file)"
)
# Require user to provide a list of PII fields to obfuscate
# e.g. --pii name email_address
# If not provided, the program will exit
parser.add_argument(
"-p",
"--pii",
nargs="+",
required=True,
help="List of PII fields to obfuscate, separated by spaces",
)
# Parse the arguments
args = parser.parse_args()
# Create the CSVReader object
reader = CSVReader()
# Read the CSV data based on the user's choice of local or S3
if args.local and not args.s3:
logger.debug("User chose to read CSV from local path")
data = reader.read_local(args.local)
# For debug purposes, log the data read from the CSV
logger.debug("Contents: " + str(data))
else:
logger.debug("User chose to read CSV from S3")
data = reader.read_s3(args.s3)
logger.debug("Contents: " + str(data))
# Obfuscate the data based on the user's choice of PII fields
obfuscated_data = obfuscate(data, args.pii)
# For debug purposes, log the obfuscated data as JSON for readability
logger.debug("Obfuscated data (JSON): " + json.dumps(obfuscated_data, indent=4))
return create_byte_stream(obfuscated_data)
# If the script is run directly (as it should be), call the main function
if __name__ == "__main__":
main()
|