aboutsummaryrefslogtreecommitdiffstats
path: root/obfuscator/csv_reader.py
diff options
context:
space:
mode:
authorAlex <git@ajschof.me>2025-02-18 23:08:06 +0000
committerGitHub <noreply@github.com>2025-02-18 23:08:06 +0000
commiteb0d30d0235dbadd1d5c385a0a49d4cd8aea021e (patch)
tree4970d8999b622998800a99e2922b8639049169d9 /obfuscator/csv_reader.py
parent362805c9354dc653442f4e144022cc577ebeb43e (diff)
parent20572634aaab2b522128a88449b2a32b6c028fc4 (diff)
downloadgdpr-obfuscator-eb0d30d0235dbadd1d5c385a0a49d4cd8aea021e.tar.gz
gdpr-obfuscator-eb0d30d0235dbadd1d5c385a0a49d4cd8aea021e.zip
Merge pull request #6 from ajschofield/feat/read_s3
implement reading from s3 bucket
Diffstat (limited to 'obfuscator/csv_reader.py')
-rw-r--r--obfuscator/csv_reader.py35
1 files changed, 33 insertions, 2 deletions
diff --git a/obfuscator/csv_reader.py b/obfuscator/csv_reader.py
index eb93609..8f4ebea 100644
--- a/obfuscator/csv_reader.py
+++ b/obfuscator/csv_reader.py
@@ -1,7 +1,10 @@
import csv
import io
+import boto3
+import os
from typing import List, Dict
from obfuscator.logger import get_logger
+from obfuscator.utils import get_s3_path
# Create the logger
logger = get_logger("CSVReader")
@@ -47,8 +50,36 @@ class CSVReader:
A method to read an S3 object containing CSV data
and return the data as a list of dictionaries.
"""
- # Yet to be implemented.
- return []
+ bucket, key = get_s3_path(path)
+ logger.debug(f"Reading S3 CSV from: {bucket}/{key}")
+
+ # If DEBUG=TRUE, use the localstack endpoint for testing
+ if os.getenv("DEBUG", "FALSE").upper() == "TRUE":
+ localstack_endpoint = "http://localhost.localstack.cloud:4566"
+ logger.debug("Using LocalStack endpoint for S3")
+ client = boto3.client(
+ "s3",
+ endpoint_url=localstack_endpoint,
+ aws_access_key_id="dummy",
+ aws_secret_access_key="dummy",
+ )
+ logger.debug(f"endpoint_url: {localstack_endpoint}")
+ else:
+ client = boto3.client("s3")
+
+ try:
+ # Attempt to read the S3 object and return the data as a list of dictionaries
+ response = client.get_object(Bucket=bucket, Key=key)
+ logger.info("S3 object read successfully")
+ # Read and decode the content
+ content = response["Body"].read().decode("utf-8")
+ # Even though the read_string method was only created for testing,
+ # it can be reused here to read and return the CSV data
+ return CSVReader.read_string(content)
+ # TODO: Add more specific exceptions to catch
+ except Exception as e:
+ logger.error(f"Error reading S3 object: {e}")
+ raise
@staticmethod
def read_string(content: str) -> List[Dict[str, str]]:
git.ajschof.me — hosted by ajschofield — powered by cgit