aboutsummaryrefslogtreecommitdiffstats
path: root/gdpr_obfuscator/utils.py
diff options
context:
space:
mode:
authorAlex Schofield <git@ajschof.me>2025-03-02 18:34:37 +0000
committerAlex Schofield <git@ajschof.me>2025-03-02 18:34:37 +0000
commitfd8784fbb915b23746e6c8f7d0098cfefd833950 (patch)
treeb68b2f02b3b33c0b1f9838fb555243e77643bcc0 /gdpr_obfuscator/utils.py
parent6083543b7a259fa7ac967eb3e0d787a1dda2d7f4 (diff)
downloadgdpr-obfuscator-fd8784fbb915b23746e6c8f7d0098cfefd833950.tar.gz
gdpr-obfuscator-fd8784fbb915b23746e6c8f7d0098cfefd833950.zip
add docstrings to Utilities methods
Diffstat (limited to 'gdpr_obfuscator/utils.py')
-rw-r--r--gdpr_obfuscator/utils.py38
1 files changed, 38 insertions, 0 deletions
diff --git a/gdpr_obfuscator/utils.py b/gdpr_obfuscator/utils.py
index 1012358..e2a1231 100644
--- a/gdpr_obfuscator/utils.py
+++ b/gdpr_obfuscator/utils.py
@@ -7,6 +7,23 @@ import json
class Utilities:
@staticmethod
def process_json_input(json_input: str) -> Tuple[str, List[str]]:
+ """
+ Parse JSON input and return the file path and PII fields.
+
+ The JSON string is required to have:
+ - file_path: the path to the file to be processed
+ - pii_fields: a list of fields to be obfuscated
+
+ Args:
+ json_input (str): A JSON string containing the file path and PII fields
+
+ Raises:
+ ValueError: If the JSON input is missing "file_path" or "pii_fields"
+
+ Returns:
+ Tuple[str, List[str]]: A tuple containing the file path and PII fields, respectively
+ """
+
data = json.loads(json_input)
if not data.get("file_path") or not data.get("pii_fields"):
@@ -18,6 +35,16 @@ class Utilities:
@staticmethod
def get_s3_path(uri) -> Tuple[str, str]:
+ """
+ Extract the S3 bucket name and key from a given S3 URI.
+
+ Args:
+ uri (_type_): The S3 URI to extract the bucket and key from
+
+ Returns:
+ Tuple[str, str]: A tuple containing the bucket name and the key, respectively
+ """
+
parts = uri.replace("s3://", "").split("/")
bucket = parts.pop(0)
key = "/".join(parts)
@@ -25,6 +52,17 @@ class Utilities:
@staticmethod
def create_byte_stream(data: List[Dict[str, str]]) -> bytes:
+ """
+ Convert a list of dictionaries (representing CSV rows) into a CSV byte stream.
+
+ If the input data is empty, an empty byte stream is returned.
+
+ Args:
+ data (List[Dict[str, str]]): A list of dictionaries representing CSV rows
+
+ Returns:
+ bytes: A byte stream representing the CSV data
+ """
if not data:
return b""
git.ajschof.me — hosted by ajschofield — powered by cgit