From 501144d78220f14f74652ef95c88f0ce632c1fa0 Mon Sep 17 00:00:00 2001 From: Alex Schofield Date: Sun, 2 Mar 2025 18:41:11 +0000 Subject: add docstrings to Obfuscator methods in __init__.py --- gdpr_obfuscator/__init__.py | 46 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) (limited to 'gdpr_obfuscator/__init__.py') diff --git a/gdpr_obfuscator/__init__.py b/gdpr_obfuscator/__init__.py index 7f118d0..f5440a6 100644 --- a/gdpr_obfuscator/__init__.py +++ b/gdpr_obfuscator/__init__.py @@ -4,16 +4,62 @@ from .utils import Utilities class Obfuscator: + """ + Provides the functionality to obfuscate the sensitive PII data in CSV files. + + It integrates the file handler to load the CSV data and the obfuscation logic to + replace the PII fields with obfuscated values. The obfuscated data is then returned + as a byte stream. + + The input is expected to be a JSON string containing the file path and the PII fields, + and the user should utilise either the `process_s3` or `process_local` methods to + obfuscate the data, depending on the file location. + """ + def __init__(self): + """ + Initialise the Obfuscator with a FileHandler and Utilities instance. + """ self.reader = FileHandler() self.utils = Utilities() def process_s3(self, input: str) -> bytes: + """ + Process a CSV file stored in an S3 bucket and obfuscate the PII fields. + + The method expects a JSON string input that contains: + - "file_path": an S3 URI (e.g., "s3://bucket/key") + - "pii_fields": a list of column names that contain PII + + Args: + input (str): A JSON string containing the S3 URI and PII fields, respectively + + Returns: + bytes: The obfuscated CSV data as a byte stream + """ + path, pii_fields = self.utils.process_json_input(input) obfuscated_data = obfuscate_data(self.reader.read_s3(path), pii_fields) return self.utils.create_byte_stream(obfuscated_data) def process_local(self, input: str) -> bytes: + """ + Process a CSV file stored locally and obfuscate the PII fields. + + The method expects a JSON string input that contains: + - "file_path": a local file path + - "pii_fields": a list of column names that contain PII + + NOTE: Since the scope of the project is to obfuscate data stored in S3, this method + has only been tested to work on MacOS and Linux systems. + + Args: + input (str): A JSON string containing the local file path and PII fields, respectively + + Returns: + bytes: The obfuscated CSV data as a byte stream + """ + path, pii_fields = self.utils.process_json_input(input) obfuscated_data = obfuscate_data(self.reader.read_local(path), pii_fields) return self.utils.create_byte_stream(obfuscated_data) -- cgit v1.2.3