aboutsummaryrefslogtreecommitdiffstats
path: root/gdpr_obfuscator/__init__.py
diff options
context:
space:
mode:
authorAlex Schofield <git@ajschof.me>2025-03-02 18:41:11 +0000
committerAlex Schofield <git@ajschof.me>2025-03-02 18:41:11 +0000
commit501144d78220f14f74652ef95c88f0ce632c1fa0 (patch)
treeccb2e04b2278e39b04d59f0d2831bc8cc0a935fb /gdpr_obfuscator/__init__.py
parentfd8784fbb915b23746e6c8f7d0098cfefd833950 (diff)
downloadgdpr-obfuscator-501144d78220f14f74652ef95c88f0ce632c1fa0.tar.gz
gdpr-obfuscator-501144d78220f14f74652ef95c88f0ce632c1fa0.zip
add docstrings to Obfuscator methods in __init__.py
Diffstat (limited to 'gdpr_obfuscator/__init__.py')
-rw-r--r--gdpr_obfuscator/__init__.py46
1 files changed, 46 insertions, 0 deletions
diff --git a/gdpr_obfuscator/__init__.py b/gdpr_obfuscator/__init__.py
index 7f118d0..f5440a6 100644
--- a/gdpr_obfuscator/__init__.py
+++ b/gdpr_obfuscator/__init__.py
@@ -4,16 +4,62 @@ from .utils import Utilities
class Obfuscator:
+ """
+ Provides the functionality to obfuscate the sensitive PII data in CSV files.
+
+ It integrates the file handler to load the CSV data and the obfuscation logic to
+ replace the PII fields with obfuscated values. The obfuscated data is then returned
+ as a byte stream.
+
+ The input is expected to be a JSON string containing the file path and the PII fields,
+ and the user should utilise either the `process_s3` or `process_local` methods to
+ obfuscate the data, depending on the file location.
+ """
+
def __init__(self):
+ """
+ Initialise the Obfuscator with a FileHandler and Utilities instance.
+ """
self.reader = FileHandler()
self.utils = Utilities()
def process_s3(self, input: str) -> bytes:
+ """
+ Process a CSV file stored in an S3 bucket and obfuscate the PII fields.
+
+ The method expects a JSON string input that contains:
+ - "file_path": an S3 URI (e.g., "s3://bucket/key")
+ - "pii_fields": a list of column names that contain PII
+
+ Args:
+ input (str): A JSON string containing the S3 URI and PII fields, respectively
+
+ Returns:
+ bytes: The obfuscated CSV data as a byte stream
+ """
+
path, pii_fields = self.utils.process_json_input(input)
obfuscated_data = obfuscate_data(self.reader.read_s3(path), pii_fields)
return self.utils.create_byte_stream(obfuscated_data)
def process_local(self, input: str) -> bytes:
+ """
+ Process a CSV file stored locally and obfuscate the PII fields.
+
+ The method expects a JSON string input that contains:
+ - "file_path": a local file path
+ - "pii_fields": a list of column names that contain PII
+
+ NOTE: Since the scope of the project is to obfuscate data stored in S3, this method
+ has only been tested to work on MacOS and Linux systems.
+
+ Args:
+ input (str): A JSON string containing the local file path and PII fields, respectively
+
+ Returns:
+ bytes: The obfuscated CSV data as a byte stream
+ """
+
path, pii_fields = self.utils.process_json_input(input)
obfuscated_data = obfuscate_data(self.reader.read_local(path), pii_fields)
return self.utils.create_byte_stream(obfuscated_data)
git.ajschof.me — hosted by ajschofield — powered by cgit