diff options
| author | Alex Schofield <git@ajschof.me> | 2025-03-02 18:50:45 +0000 |
|---|---|---|
| committer | Alex Schofield <git@ajschof.me> | 2025-03-02 18:50:45 +0000 |
| commit | a9d812a4e430ef305a9a36eda0e651d415be62c6 (patch) | |
| tree | e9ceaf656a8da50b1a6176f8dc7ff2afddd2fc43 /gdpr_obfuscator | |
| parent | 501144d78220f14f74652ef95c88f0ce632c1fa0 (diff) | |
| download | gdpr-obfuscator-a9d812a4e430ef305a9a36eda0e651d415be62c6.tar.gz gdpr-obfuscator-a9d812a4e430ef305a9a36eda0e651d415be62c6.zip | |
add docstrings to FileHandler methods in read.py
Diffstat (limited to 'gdpr_obfuscator')
| -rw-r--r-- | gdpr_obfuscator/read.py | 50 |
1 files changed, 42 insertions, 8 deletions
diff --git a/gdpr_obfuscator/read.py b/gdpr_obfuscator/read.py index dd8d48c..1d80718 100644 --- a/gdpr_obfuscator/read.py +++ b/gdpr_obfuscator/read.py @@ -7,17 +7,34 @@ from .utils import Utilities class FileHandler: """ - A class to read CSV data from a local file, S3 object, or string. Near - the project completion, support for JSON/Parquet files will be added. + A class to read CSV data from a local file, S3 object, or string. Currently, + CSV files are supported but support for JSON and Parquet files may be + added in the future. """ def __init__(self): + """ + Initialise the FileHandler with a Utilities instance. + """ self.utils = Utilities() def read_local(self, file_path) -> List[Dict[str, str]]: """ - A method to read a local CSV file and return the data as a list of - dictionaries. + Read a local CSV file and return the data as a list of dictionaries. + + The file path should be a local path to the CSV file. There is no logic + to convert file paths between operating systems since `read_s3` is the + main method to be used. Therefore, this method will only work reliably + on MacOS and Linux systems. + + This method uses the built-in `open` function to read the CSV file and + then reads the CSV data using `read_string` to be returned. + + Args: + file_path (_type_): The local file path to the CSV file + + Returns: + List[Dict[str, str]]: A list of dictionaries representing the CSV data rows """ with open(file_path, mode="r", encoding="utf-8") as f: @@ -25,8 +42,18 @@ class FileHandler: def read_s3(self, file_path) -> List[Dict[str, str]]: """ - A method to read an S3 object containing CSV data - and return the data as a list of dictionaries. + Read a CSV file within an S3 bucket and return the data as a list of dictionaries. + + The S3 URI should be in the format "s3://bucket/key". This method uses + get_object present in the boto3 library to interact with S3 and retrieve + the CSV file. Once retrieved, the CSV data is read using `read_string` + and is returned. + + Args: + file_path (_type_): The local file path to the CSV file + + Returns: + List[Dict[str, str]]: A list of dictionaries representing the CSV data rows """ bucket, key = self.utils.get_s3_path(file_path) @@ -40,8 +67,15 @@ class FileHandler: @staticmethod def read_string(content: str) -> List[Dict[str, str]]: """ - A method to read CSV data from a string and return the data as a list - of dictionaries. + Parse raw data provided by read helpers and return the data as a list of dictionaries. + + If the provided string is empty, an empty list is returned. + + Args: + content (str): The raw CSV data as a string + + Returns: + List[Dict[str, str]]: A list of dictionaries representing the CSV data rows """ if not content.strip(): return [] |
