aboutsummaryrefslogtreecommitdiffstats
path: root/cli.py
blob: f2c8771a2edade7c11e4b46be5b78deaa25e12b2 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
import argparse
import json
from obfuscator.csv_reader import CSVReader
from obfuscator.obfuscate import obfuscate
from obfuscator.logger import get_logger
from obfuscator.csv_writer import create_byte_stream

# Create the logger
logger = get_logger("CLI")


def main():
    # Create an argument parser
    parser = argparse.ArgumentParser(description="gdpr-obfuscator")
    # Require user to either choose a local file or an S3 object
    # The user can only choose one of these options or the program will exit
    # If not provided, the program will exit
    loc = parser.add_mutually_exclusive_group(required=True)
    loc.add_argument("--local")
    loc.add_argument("--s3")

    # Require user to provide a list of PII fields to obfuscate
    # e.g. --pii name email_address
    # If not provided, the program will exit
    parser.add_argument("--pii", nargs="+", required=True)

    # Parse the arguments
    args = parser.parse_args()

    # Create the CSVReader object
    reader = CSVReader()

    # Read the CSV data based on the user's choice of local or S3
    if args.local and not args.s3:
        logger.debug("User chose to read CSV from local path")
        data = reader.read_local(args.local)
        # For debug purposes, log the data read from the CSV
        logger.debug("Contents: " + str(data))
    else:
        logger.debug("User chose to read CSV from S3")
        data = reader.read_s3(args.s3)
        logger.debug("Contents: " + str(data))

    # Obfuscate the data based on the user's choice of PII fields
    obfuscated_data = obfuscate(data, args.pii)
    # For debug purposes, log the obfuscated data as JSON for readability
    logger.debug("Obfuscated data (JSON): " + json.dumps(obfuscated_data, indent=4))
    return create_byte_stream(obfuscated_data)


# If the script is run directly (as it should be), call the main function
if __name__ == "__main__":
    main()
git.ajschof.me — hosted by ajschofield — powered by cgit