From fd8784fbb915b23746e6c8f7d0098cfefd833950 Mon Sep 17 00:00:00 2001 From: Alex Schofield Date: Sun, 2 Mar 2025 18:34:37 +0000 Subject: add docstrings to Utilities methods --- gdpr_obfuscator/utils.py | 38 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) (limited to 'gdpr_obfuscator') diff --git a/gdpr_obfuscator/utils.py b/gdpr_obfuscator/utils.py index 1012358..e2a1231 100644 --- a/gdpr_obfuscator/utils.py +++ b/gdpr_obfuscator/utils.py @@ -7,6 +7,23 @@ import json class Utilities: @staticmethod def process_json_input(json_input: str) -> Tuple[str, List[str]]: + """ + Parse JSON input and return the file path and PII fields. + + The JSON string is required to have: + - file_path: the path to the file to be processed + - pii_fields: a list of fields to be obfuscated + + Args: + json_input (str): A JSON string containing the file path and PII fields + + Raises: + ValueError: If the JSON input is missing "file_path" or "pii_fields" + + Returns: + Tuple[str, List[str]]: A tuple containing the file path and PII fields, respectively + """ + data = json.loads(json_input) if not data.get("file_path") or not data.get("pii_fields"): @@ -18,6 +35,16 @@ class Utilities: @staticmethod def get_s3_path(uri) -> Tuple[str, str]: + """ + Extract the S3 bucket name and key from a given S3 URI. + + Args: + uri (_type_): The S3 URI to extract the bucket and key from + + Returns: + Tuple[str, str]: A tuple containing the bucket name and the key, respectively + """ + parts = uri.replace("s3://", "").split("/") bucket = parts.pop(0) key = "/".join(parts) @@ -25,6 +52,17 @@ class Utilities: @staticmethod def create_byte_stream(data: List[Dict[str, str]]) -> bytes: + """ + Convert a list of dictionaries (representing CSV rows) into a CSV byte stream. + + If the input data is empty, an empty byte stream is returned. + + Args: + data (List[Dict[str, str]]): A list of dictionaries representing CSV rows + + Returns: + bytes: A byte stream representing the CSV data + """ if not data: return b"" -- cgit v1.2.3