From ae89b05dbc8feebc1410f39143c0d829f8704235 Mon Sep 17 00:00:00 2001 From: Alex Schofield Date: Wed, 19 Feb 2025 15:37:16 +0000 Subject: rename CSVReader to DataReader & update references --- obfuscator/csv_reader.py | 89 ------------------------------------------------ obfuscator/read.py | 89 ++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 89 insertions(+), 89 deletions(-) delete mode 100644 obfuscator/csv_reader.py create mode 100644 obfuscator/read.py (limited to 'obfuscator') diff --git a/obfuscator/csv_reader.py b/obfuscator/csv_reader.py deleted file mode 100644 index 1f503d7..0000000 --- a/obfuscator/csv_reader.py +++ /dev/null @@ -1,89 +0,0 @@ -import csv -import io -import boto3 -import os -from typing import List, Dict -from obfuscator.logger import get_logger -from obfuscator.utils import Utilities - - -class CSVReader: - """ - A class to read CSV data from a local file, S3 object, or string. Near - the project completion, support for JSON/Parquet files will be added. - """ - - def __init__(self, log_level=None): - self.log_level = log_level - self.logger = get_logger("CSVREADER", log_level) - - def read_local(self, path) -> List[Dict[str, str]]: - """ - A method to read a local CSV file and return the data as a list of - dictionaries. - """ - self.logger.debug(f"Reading local CSV from: {path}") - - try: - with open(path, mode="r", encoding="utf-8") as f: - reader = csv.DictReader(f) - return [dict(row) for row in reader] - except FileNotFoundError: - self.logger.error(f"File not found: {path}") - raise - except Exception as e: - self.logger.error(f"Error reading file: {e}") - - def read_s3(self, path) -> List[Dict[str, str]]: - """ - A method to read an S3 object containing CSV data - and return the data as a list of dictionaries. - """ - utils = Utilities(self.log_level) - bucket, key = utils.get_s3_path(path) - self.logger.debug(f"Reading S3 CSV from: {bucket}/{key}") - - if os.getenv("LOCALSTACK", "FALSE").upper() == "TRUE": - localstack_endpoint = "http://localhost.localstack.cloud:4566" - self.logger.debug( - "Using LocalStack endpoint for S3 - ensure LocalStack is running" - ) - client = boto3.client( - "s3", - endpoint_url=localstack_endpoint, - aws_access_key_id="dummy", - aws_secret_access_key="dummy", - ) - self.logger.debug(f"endpoint_url: {localstack_endpoint}") - else: - client = boto3.client("s3") - - try: - response = client.get_object(Bucket=bucket, Key=key) - self.logger.info("S3 object read successfully") - content = response["Body"].read().decode("utf-8") - return self.read_string(content) - except client.exceptions.NoSuchKey: - self.logger.error(f"Object not found: {bucket}/{key}") - raise - except client.exceptions.ClientError as e: - self.logger.error(f"Error reading S3 object: {e}") - raise - except UnicodeDecodeError as e: - self.logger.error(f"Error decoding S3 object: {e}") - raise - except Exception as e: - self.logger.error(f"Error reading S3 object: {e}") - raise - - def read_string(self, content: str) -> List[Dict[str, str]]: - """ - A method to read CSV data from a string and return the data as a list - of dictionaries. - """ - if not content.strip(): - return [] - - f = io.StringIO(content) - reader = csv.DictReader(f) - return [dict(row) for row in reader] diff --git a/obfuscator/read.py b/obfuscator/read.py new file mode 100644 index 0000000..b704643 --- /dev/null +++ b/obfuscator/read.py @@ -0,0 +1,89 @@ +import csv +import io +import boto3 +import os +from typing import List, Dict +from obfuscator.logger import get_logger +from obfuscator.utils import Utilities + + +class DataReader: + """ + A class to read CSV data from a local file, S3 object, or string. Near + the project completion, support for JSON/Parquet files will be added. + """ + + def __init__(self, log_level=None): + self.log_level = log_level + self.logger = get_logger("CSVREADER", log_level) + + def read_local(self, path) -> List[Dict[str, str]]: + """ + A method to read a local CSV file and return the data as a list of + dictionaries. + """ + self.logger.debug(f"Reading local CSV from: {path}") + + try: + with open(path, mode="r", encoding="utf-8") as f: + reader = csv.DictReader(f) + return [dict(row) for row in reader] + except FileNotFoundError: + self.logger.error(f"File not found: {path}") + raise + except Exception as e: + self.logger.error(f"Error reading file: {e}") + + def read_s3(self, path) -> List[Dict[str, str]]: + """ + A method to read an S3 object containing CSV data + and return the data as a list of dictionaries. + """ + utils = Utilities(self.log_level) + bucket, key = utils.get_s3_path(path) + self.logger.debug(f"Reading S3 CSV from: {bucket}/{key}") + + if os.getenv("LOCALSTACK", "FALSE").upper() == "TRUE": + localstack_endpoint = "http://localhost.localstack.cloud:4566" + self.logger.debug( + "Using LocalStack endpoint for S3 - ensure LocalStack is running" + ) + client = boto3.client( + "s3", + endpoint_url=localstack_endpoint, + aws_access_key_id="dummy", + aws_secret_access_key="dummy", + ) + self.logger.debug(f"endpoint_url: {localstack_endpoint}") + else: + client = boto3.client("s3") + + try: + response = client.get_object(Bucket=bucket, Key=key) + self.logger.info("S3 object read successfully") + content = response["Body"].read().decode("utf-8") + return self.read_string(content) + except client.exceptions.NoSuchKey: + self.logger.error(f"Object not found: {bucket}/{key}") + raise + except client.exceptions.ClientError as e: + self.logger.error(f"Error reading S3 object: {e}") + raise + except UnicodeDecodeError as e: + self.logger.error(f"Error decoding S3 object: {e}") + raise + except Exception as e: + self.logger.error(f"Error reading S3 object: {e}") + raise + + def read_string(self, content: str) -> List[Dict[str, str]]: + """ + A method to read CSV data from a string and return the data as a list + of dictionaries. + """ + if not content.strip(): + return [] + + f = io.StringIO(content) + reader = csv.DictReader(f) + return [dict(row) for row in reader] -- cgit v1.2.3