diff options
| author | Alex <git@ajschof.me> | 2025-02-18 23:08:06 +0000 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2025-02-18 23:08:06 +0000 |
| commit | eb0d30d0235dbadd1d5c385a0a49d4cd8aea021e (patch) | |
| tree | 4970d8999b622998800a99e2922b8639049169d9 /obfuscator/csv_reader.py | |
| parent | 362805c9354dc653442f4e144022cc577ebeb43e (diff) | |
| parent | 20572634aaab2b522128a88449b2a32b6c028fc4 (diff) | |
| download | gdpr-obfuscator-eb0d30d0235dbadd1d5c385a0a49d4cd8aea021e.tar.gz gdpr-obfuscator-eb0d30d0235dbadd1d5c385a0a49d4cd8aea021e.zip | |
Merge pull request #6 from ajschofield/feat/read_s3
implement reading from s3 bucket
Diffstat (limited to 'obfuscator/csv_reader.py')
| -rw-r--r-- | obfuscator/csv_reader.py | 35 |
1 files changed, 33 insertions, 2 deletions
diff --git a/obfuscator/csv_reader.py b/obfuscator/csv_reader.py index eb93609..8f4ebea 100644 --- a/obfuscator/csv_reader.py +++ b/obfuscator/csv_reader.py @@ -1,7 +1,10 @@ import csv import io +import boto3 +import os from typing import List, Dict from obfuscator.logger import get_logger +from obfuscator.utils import get_s3_path # Create the logger logger = get_logger("CSVReader") @@ -47,8 +50,36 @@ class CSVReader: A method to read an S3 object containing CSV data and return the data as a list of dictionaries. """ - # Yet to be implemented. - return [] + bucket, key = get_s3_path(path) + logger.debug(f"Reading S3 CSV from: {bucket}/{key}") + + # If DEBUG=TRUE, use the localstack endpoint for testing + if os.getenv("DEBUG", "FALSE").upper() == "TRUE": + localstack_endpoint = "http://localhost.localstack.cloud:4566" + logger.debug("Using LocalStack endpoint for S3") + client = boto3.client( + "s3", + endpoint_url=localstack_endpoint, + aws_access_key_id="dummy", + aws_secret_access_key="dummy", + ) + logger.debug(f"endpoint_url: {localstack_endpoint}") + else: + client = boto3.client("s3") + + try: + # Attempt to read the S3 object and return the data as a list of dictionaries + response = client.get_object(Bucket=bucket, Key=key) + logger.info("S3 object read successfully") + # Read and decode the content + content = response["Body"].read().decode("utf-8") + # Even though the read_string method was only created for testing, + # it can be reused here to read and return the CSV data + return CSVReader.read_string(content) + # TODO: Add more specific exceptions to catch + except Exception as e: + logger.error(f"Error reading S3 object: {e}") + raise @staticmethod def read_string(content: str) -> List[Dict[str, str]]: |
