From 878d6e0dfd0ce1bfe00ca0bcadce6dd16749e6d3 Mon Sep 17 00:00:00 2001 From: Alex Schofield Date: Tue, 18 Feb 2025 19:53:46 +0000 Subject: create utility function to parse S3 URI --- obfuscator/utils.py | 8 ++++++++ 1 file changed, 8 insertions(+) create mode 100644 obfuscator/utils.py (limited to 'obfuscator') diff --git a/obfuscator/utils.py b/obfuscator/utils.py new file mode 100644 index 0000000..f0174f8 --- /dev/null +++ b/obfuscator/utils.py @@ -0,0 +1,8 @@ +# Utility functions + + +def get_s3_path(uri): + parts = uri.replace("s3://", "").split("/") + bucket = parts.pop(0) + key = "/".join(parts) + return bucket, key -- cgit v1.2.3 From 2c19a941bb3afe4145761c4c6e54880490160aa2 Mon Sep 17 00:00:00 2001 From: Alex Schofield Date: Tue, 18 Feb 2025 21:19:51 +0000 Subject: call get_s3_path() in read_s3 and debug log path --- obfuscator/csv_reader.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'obfuscator') diff --git a/obfuscator/csv_reader.py b/obfuscator/csv_reader.py index eb93609..578b96b 100644 --- a/obfuscator/csv_reader.py +++ b/obfuscator/csv_reader.py @@ -2,6 +2,7 @@ import csv import io from typing import List, Dict from obfuscator.logger import get_logger +from obfuscator.utils import get_s3_path # Create the logger logger = get_logger("CSVReader") @@ -48,7 +49,8 @@ class CSVReader: and return the data as a list of dictionaries. """ # Yet to be implemented. - return [] + bucket, key = get_s3_path(path) + logger.debug(f"Reading S3 CSV from: {bucket}/{key}") @staticmethod def read_string(content: str) -> List[Dict[str, str]]: -- cgit v1.2.3 From d0a8826f43a48e3db53f4ec3f62d5b6e5f3fd15d Mon Sep 17 00:00:00 2001 From: Alex Schofield Date: Tue, 18 Feb 2025 22:11:01 +0000 Subject: add logging to utils.py --- obfuscator/utils.py | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'obfuscator') diff --git a/obfuscator/utils.py b/obfuscator/utils.py index f0174f8..2e4211f 100644 --- a/obfuscator/utils.py +++ b/obfuscator/utils.py @@ -1,8 +1,15 @@ # Utility functions +from obfuscator.logger import get_logger + +# Create the logger +logger = get_logger("CLI") def get_s3_path(uri): parts = uri.replace("s3://", "").split("/") + logger.debug(f"Parts: {parts}") bucket = parts.pop(0) + logger.debug(f"Bucket: {bucket}") key = "/".join(parts) + logger.debug(f"Key: {key}") return bucket, key -- cgit v1.2.3 From 6e8c602b7cce9244e66fb0056eeba5e6ab697e6a Mon Sep 17 00:00:00 2001 From: Alex Schofield Date: Tue, 18 Feb 2025 22:38:48 +0000 Subject: add untested read_s3 logic to CSVReader --- obfuscator/csv_reader.py | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) (limited to 'obfuscator') diff --git a/obfuscator/csv_reader.py b/obfuscator/csv_reader.py index 578b96b..c777998 100644 --- a/obfuscator/csv_reader.py +++ b/obfuscator/csv_reader.py @@ -1,5 +1,6 @@ import csv import io +import boto3 from typing import List, Dict from obfuscator.logger import get_logger from obfuscator.utils import get_s3_path @@ -48,10 +49,25 @@ class CSVReader: A method to read an S3 object containing CSV data and return the data as a list of dictionaries. """ - # Yet to be implemented. bucket, key = get_s3_path(path) logger.debug(f"Reading S3 CSV from: {bucket}/{key}") + client = boto3.client("s3") + + try: + # Attempt to read the S3 object and return the data as a list of dictionaries + response = client.get_object(Bucket=bucket, Key=key) + logger.info("S3 object read successfully") + # Read and decode the content + content = response["Body"].read().decode("utf-8") + # Even though the read_string method was only created for testing, + # it can be reused here to read and return the CSV data + return CSVReader.read_string(content) + # TODO: Add more specific exceptions to catch + except Exception as e: + logger.error(f"Error reading S3 object: {e}") + raise + @staticmethod def read_string(content: str) -> List[Dict[str, str]]: """ -- cgit v1.2.3 From 31d01d5efbccbf923e9131c0b67aa916be873e9e Mon Sep 17 00:00:00 2001 From: Alex Schofield Date: Tue, 18 Feb 2025 22:50:57 +0000 Subject: change boto3 endpoint if debug mode is enabled should this go here in 'production' code, even though it's a testing tool? this may be changed in the near future. i've just got a gut feeling that this might not be right. --- obfuscator/csv_reader.py | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) (limited to 'obfuscator') diff --git a/obfuscator/csv_reader.py b/obfuscator/csv_reader.py index c777998..8f4ebea 100644 --- a/obfuscator/csv_reader.py +++ b/obfuscator/csv_reader.py @@ -1,6 +1,7 @@ import csv import io import boto3 +import os from typing import List, Dict from obfuscator.logger import get_logger from obfuscator.utils import get_s3_path @@ -52,7 +53,19 @@ class CSVReader: bucket, key = get_s3_path(path) logger.debug(f"Reading S3 CSV from: {bucket}/{key}") - client = boto3.client("s3") + # If DEBUG=TRUE, use the localstack endpoint for testing + if os.getenv("DEBUG", "FALSE").upper() == "TRUE": + localstack_endpoint = "http://localhost.localstack.cloud:4566" + logger.debug("Using LocalStack endpoint for S3") + client = boto3.client( + "s3", + endpoint_url=localstack_endpoint, + aws_access_key_id="dummy", + aws_secret_access_key="dummy", + ) + logger.debug(f"endpoint_url: {localstack_endpoint}") + else: + client = boto3.client("s3") try: # Attempt to read the S3 object and return the data as a list of dictionaries -- cgit v1.2.3