aboutsummaryrefslogtreecommitdiffstats
path: root/gdpr_obfuscator
diff options
context:
space:
mode:
authorAlex Schofield <git@ajschof.me>2025-03-02 18:50:45 +0000
committerAlex Schofield <git@ajschof.me>2025-03-02 18:50:45 +0000
commita9d812a4e430ef305a9a36eda0e651d415be62c6 (patch)
treee9ceaf656a8da50b1a6176f8dc7ff2afddd2fc43 /gdpr_obfuscator
parent501144d78220f14f74652ef95c88f0ce632c1fa0 (diff)
downloadgdpr-obfuscator-a9d812a4e430ef305a9a36eda0e651d415be62c6.tar.gz
gdpr-obfuscator-a9d812a4e430ef305a9a36eda0e651d415be62c6.zip
add docstrings to FileHandler methods in read.py
Diffstat (limited to 'gdpr_obfuscator')
-rw-r--r--gdpr_obfuscator/read.py50
1 files changed, 42 insertions, 8 deletions
diff --git a/gdpr_obfuscator/read.py b/gdpr_obfuscator/read.py
index dd8d48c..1d80718 100644
--- a/gdpr_obfuscator/read.py
+++ b/gdpr_obfuscator/read.py
@@ -7,17 +7,34 @@ from .utils import Utilities
class FileHandler:
"""
- A class to read CSV data from a local file, S3 object, or string. Near
- the project completion, support for JSON/Parquet files will be added.
+ A class to read CSV data from a local file, S3 object, or string. Currently,
+ CSV files are supported but support for JSON and Parquet files may be
+ added in the future.
"""
def __init__(self):
+ """
+ Initialise the FileHandler with a Utilities instance.
+ """
self.utils = Utilities()
def read_local(self, file_path) -> List[Dict[str, str]]:
"""
- A method to read a local CSV file and return the data as a list of
- dictionaries.
+ Read a local CSV file and return the data as a list of dictionaries.
+
+ The file path should be a local path to the CSV file. There is no logic
+ to convert file paths between operating systems since `read_s3` is the
+ main method to be used. Therefore, this method will only work reliably
+ on MacOS and Linux systems.
+
+ This method uses the built-in `open` function to read the CSV file and
+ then reads the CSV data using `read_string` to be returned.
+
+ Args:
+ file_path (_type_): The local file path to the CSV file
+
+ Returns:
+ List[Dict[str, str]]: A list of dictionaries representing the CSV data rows
"""
with open(file_path, mode="r", encoding="utf-8") as f:
@@ -25,8 +42,18 @@ class FileHandler:
def read_s3(self, file_path) -> List[Dict[str, str]]:
"""
- A method to read an S3 object containing CSV data
- and return the data as a list of dictionaries.
+ Read a CSV file within an S3 bucket and return the data as a list of dictionaries.
+
+ The S3 URI should be in the format "s3://bucket/key". This method uses
+ get_object present in the boto3 library to interact with S3 and retrieve
+ the CSV file. Once retrieved, the CSV data is read using `read_string`
+ and is returned.
+
+ Args:
+ file_path (_type_): The local file path to the CSV file
+
+ Returns:
+ List[Dict[str, str]]: A list of dictionaries representing the CSV data rows
"""
bucket, key = self.utils.get_s3_path(file_path)
@@ -40,8 +67,15 @@ class FileHandler:
@staticmethod
def read_string(content: str) -> List[Dict[str, str]]:
"""
- A method to read CSV data from a string and return the data as a list
- of dictionaries.
+ Parse raw data provided by read helpers and return the data as a list of dictionaries.
+
+ If the provided string is empty, an empty list is returned.
+
+ Args:
+ content (str): The raw CSV data as a string
+
+ Returns:
+ List[Dict[str, str]]: A list of dictionaries representing the CSV data rows
"""
if not content.strip():
return []
git.ajschof.me — hosted by ajschofield — powered by cgit