diff options
| author | Alex Schofield <git@ajschof.me> | 2025-03-02 18:41:11 +0000 |
|---|---|---|
| committer | Alex Schofield <git@ajschof.me> | 2025-03-02 18:41:11 +0000 |
| commit | 501144d78220f14f74652ef95c88f0ce632c1fa0 (patch) | |
| tree | ccb2e04b2278e39b04d59f0d2831bc8cc0a935fb | |
| parent | fd8784fbb915b23746e6c8f7d0098cfefd833950 (diff) | |
| download | gdpr-obfuscator-501144d78220f14f74652ef95c88f0ce632c1fa0.tar.gz gdpr-obfuscator-501144d78220f14f74652ef95c88f0ce632c1fa0.zip | |
add docstrings to Obfuscator methods in __init__.py
| -rw-r--r-- | gdpr_obfuscator/__init__.py | 46 |
1 files changed, 46 insertions, 0 deletions
diff --git a/gdpr_obfuscator/__init__.py b/gdpr_obfuscator/__init__.py index 7f118d0..f5440a6 100644 --- a/gdpr_obfuscator/__init__.py +++ b/gdpr_obfuscator/__init__.py @@ -4,16 +4,62 @@ from .utils import Utilities class Obfuscator: + """ + Provides the functionality to obfuscate the sensitive PII data in CSV files. + + It integrates the file handler to load the CSV data and the obfuscation logic to + replace the PII fields with obfuscated values. The obfuscated data is then returned + as a byte stream. + + The input is expected to be a JSON string containing the file path and the PII fields, + and the user should utilise either the `process_s3` or `process_local` methods to + obfuscate the data, depending on the file location. + """ + def __init__(self): + """ + Initialise the Obfuscator with a FileHandler and Utilities instance. + """ self.reader = FileHandler() self.utils = Utilities() def process_s3(self, input: str) -> bytes: + """ + Process a CSV file stored in an S3 bucket and obfuscate the PII fields. + + The method expects a JSON string input that contains: + - "file_path": an S3 URI (e.g., "s3://bucket/key") + - "pii_fields": a list of column names that contain PII + + Args: + input (str): A JSON string containing the S3 URI and PII fields, respectively + + Returns: + bytes: The obfuscated CSV data as a byte stream + """ + path, pii_fields = self.utils.process_json_input(input) obfuscated_data = obfuscate_data(self.reader.read_s3(path), pii_fields) return self.utils.create_byte_stream(obfuscated_data) def process_local(self, input: str) -> bytes: + """ + Process a CSV file stored locally and obfuscate the PII fields. + + The method expects a JSON string input that contains: + - "file_path": a local file path + - "pii_fields": a list of column names that contain PII + + NOTE: Since the scope of the project is to obfuscate data stored in S3, this method + has only been tested to work on MacOS and Linux systems. + + Args: + input (str): A JSON string containing the local file path and PII fields, respectively + + Returns: + bytes: The obfuscated CSV data as a byte stream + """ + path, pii_fields = self.utils.process_json_input(input) obfuscated_data = obfuscate_data(self.reader.read_local(path), pii_fields) return self.utils.create_byte_stream(obfuscated_data) |
