aboutsummaryrefslogtreecommitdiffstats
path: root/gdpr_obfuscator/utils.py
diff options
context:
space:
mode:
Diffstat (limited to 'gdpr_obfuscator/utils.py')
-rw-r--r--gdpr_obfuscator/utils.py38
1 files changed, 38 insertions, 0 deletions
diff --git a/gdpr_obfuscator/utils.py b/gdpr_obfuscator/utils.py
index 1012358..e2a1231 100644
--- a/gdpr_obfuscator/utils.py
+++ b/gdpr_obfuscator/utils.py
@@ -7,6 +7,23 @@ import json
class Utilities:
@staticmethod
def process_json_input(json_input: str) -> Tuple[str, List[str]]:
+ """
+ Parse JSON input and return the file path and PII fields.
+
+ The JSON string is required to have:
+ - file_path: the path to the file to be processed
+ - pii_fields: a list of fields to be obfuscated
+
+ Args:
+ json_input (str): A JSON string containing the file path and PII fields
+
+ Raises:
+ ValueError: If the JSON input is missing "file_path" or "pii_fields"
+
+ Returns:
+ Tuple[str, List[str]]: A tuple containing the file path and PII fields, respectively
+ """
+
data = json.loads(json_input)
if not data.get("file_path") or not data.get("pii_fields"):
@@ -18,6 +35,16 @@ class Utilities:
@staticmethod
def get_s3_path(uri) -> Tuple[str, str]:
+ """
+ Extract the S3 bucket name and key from a given S3 URI.
+
+ Args:
+ uri (_type_): The S3 URI to extract the bucket and key from
+
+ Returns:
+ Tuple[str, str]: A tuple containing the bucket name and the key, respectively
+ """
+
parts = uri.replace("s3://", "").split("/")
bucket = parts.pop(0)
key = "/".join(parts)
@@ -25,6 +52,17 @@ class Utilities:
@staticmethod
def create_byte_stream(data: List[Dict[str, str]]) -> bytes:
+ """
+ Convert a list of dictionaries (representing CSV rows) into a CSV byte stream.
+
+ If the input data is empty, an empty byte stream is returned.
+
+ Args:
+ data (List[Dict[str, str]]): A list of dictionaries representing CSV rows
+
+ Returns:
+ bytes: A byte stream representing the CSV data
+ """
if not data:
return b""
git.ajschof.me — hosted by ajschofield — powered by cgit