aboutsummaryrefslogtreecommitdiffstats
path: root/obfuscator/read.py
diff options
context:
space:
mode:
authorAlex Schofield <git@ajschof.me>2025-02-20 18:20:10 +0000
committerAlex Schofield <git@ajschof.me>2025-02-20 18:20:10 +0000
commit9827fcf1a40b0c4993da3f420177f4e390e038e9 (patch)
tree43ef86bc81fc1f75465180982aa93280c7257529 /obfuscator/read.py
parentf8e6f1be79a4d6b5262cc3edaad1512cd26337ee (diff)
downloadgdpr-obfuscator-9827fcf1a40b0c4993da3f420177f4e390e038e9.tar.gz
gdpr-obfuscator-9827fcf1a40b0c4993da3f420177f4e390e038e9.zip
rename folder from obfuscator to gdpr-obfuscator
Diffstat (limited to 'obfuscator/read.py')
-rw-r--r--obfuscator/read.py89
1 files changed, 0 insertions, 89 deletions
diff --git a/obfuscator/read.py b/obfuscator/read.py
deleted file mode 100644
index b704643..0000000
--- a/obfuscator/read.py
+++ /dev/null
@@ -1,89 +0,0 @@
-import csv
-import io
-import boto3
-import os
-from typing import List, Dict
-from obfuscator.logger import get_logger
-from obfuscator.utils import Utilities
-
-
-class DataReader:
- """
- A class to read CSV data from a local file, S3 object, or string. Near
- the project completion, support for JSON/Parquet files will be added.
- """
-
- def __init__(self, log_level=None):
- self.log_level = log_level
- self.logger = get_logger("CSVREADER", log_level)
-
- def read_local(self, path) -> List[Dict[str, str]]:
- """
- A method to read a local CSV file and return the data as a list of
- dictionaries.
- """
- self.logger.debug(f"Reading local CSV from: {path}")
-
- try:
- with open(path, mode="r", encoding="utf-8") as f:
- reader = csv.DictReader(f)
- return [dict(row) for row in reader]
- except FileNotFoundError:
- self.logger.error(f"File not found: {path}")
- raise
- except Exception as e:
- self.logger.error(f"Error reading file: {e}")
-
- def read_s3(self, path) -> List[Dict[str, str]]:
- """
- A method to read an S3 object containing CSV data
- and return the data as a list of dictionaries.
- """
- utils = Utilities(self.log_level)
- bucket, key = utils.get_s3_path(path)
- self.logger.debug(f"Reading S3 CSV from: {bucket}/{key}")
-
- if os.getenv("LOCALSTACK", "FALSE").upper() == "TRUE":
- localstack_endpoint = "http://localhost.localstack.cloud:4566"
- self.logger.debug(
- "Using LocalStack endpoint for S3 - ensure LocalStack is running"
- )
- client = boto3.client(
- "s3",
- endpoint_url=localstack_endpoint,
- aws_access_key_id="dummy",
- aws_secret_access_key="dummy",
- )
- self.logger.debug(f"endpoint_url: {localstack_endpoint}")
- else:
- client = boto3.client("s3")
-
- try:
- response = client.get_object(Bucket=bucket, Key=key)
- self.logger.info("S3 object read successfully")
- content = response["Body"].read().decode("utf-8")
- return self.read_string(content)
- except client.exceptions.NoSuchKey:
- self.logger.error(f"Object not found: {bucket}/{key}")
- raise
- except client.exceptions.ClientError as e:
- self.logger.error(f"Error reading S3 object: {e}")
- raise
- except UnicodeDecodeError as e:
- self.logger.error(f"Error decoding S3 object: {e}")
- raise
- except Exception as e:
- self.logger.error(f"Error reading S3 object: {e}")
- raise
-
- def read_string(self, content: str) -> List[Dict[str, str]]:
- """
- A method to read CSV data from a string and return the data as a list
- of dictionaries.
- """
- if not content.strip():
- return []
-
- f = io.StringIO(content)
- reader = csv.DictReader(f)
- return [dict(row) for row in reader]
git.ajschof.me — hosted by ajschofield — powered by cgit