diff options
Diffstat (limited to 'obfuscator')
| -rw-r--r-- | obfuscator/__init__.py | 12 | ||||
| -rw-r--r-- | obfuscator/logger.py | 36 | ||||
| -rw-r--r-- | obfuscator/obfuscate.py | 26 | ||||
| -rw-r--r-- | obfuscator/read.py | 89 | ||||
| -rw-r--r-- | obfuscator/utils.py | 16 | ||||
| -rw-r--r-- | obfuscator/write.py | 28 |
6 files changed, 0 insertions, 207 deletions
diff --git a/obfuscator/__init__.py b/obfuscator/__init__.py deleted file mode 100644 index 2ee3b96..0000000 --- a/obfuscator/__init__.py +++ /dev/null @@ -1,12 +0,0 @@ -from obfuscator.read import DataReader -from obfuscator.write import DataWriter -from obfuscator.obfuscate import obfuscate -from typing import List - - -def main(s3_source: str, pii_fields: List[str], log_level: str = "INFO") -> bytes: - reader = DataReader(log_level) - writer = DataWriter() - data = reader.read_s3(s3_source) - obfuscated_data = obfuscate(data, pii_fields) - return writer.create_byte_stream(obfuscated_data) diff --git a/obfuscator/logger.py b/obfuscator/logger.py deleted file mode 100644 index 140fa8f..0000000 --- a/obfuscator/logger.py +++ /dev/null @@ -1,36 +0,0 @@ -import logging -import os -from enum import Enum - - -class LogLevel(Enum): - DEBUG = logging.DEBUG - INFO = logging.INFO - WARNING = logging.WARNING - ERROR = logging.ERROR - CRITICAL = logging.CRITICAL - - -def get_logger(name: str, level: LogLevel = LogLevel.INFO) -> logging.Logger: - if isinstance(level, str): - try: - level = LogLevel[level.upper()] - except KeyError: - raise ValueError( - f"Invalid log level '{level}'. Choose from: {', '.join(l.name for l in LogLevel)}" - ) - - logger = logging.getLogger(name) - - if logger.hasHandlers(): - logger.handlers.clear() - - handler = logging.StreamHandler() - logger.setLevel(level.value) - formatting = logging.Formatter( - "[%(asctime)s] - %(levelname)s::%(name)s - %(message)s" - ) - handler.setFormatter(formatting) - logger.addHandler(handler) - - return logger diff --git a/obfuscator/obfuscate.py b/obfuscator/obfuscate.py deleted file mode 100644 index cd12b6d..0000000 --- a/obfuscator/obfuscate.py +++ /dev/null @@ -1,26 +0,0 @@ -from typing import List, Dict -from obfuscator.logger import get_logger - -logger = get_logger("OBFUSCATE") - - -def obfuscate( - data: List[Dict[str, str]], pii_fields: List[str] -) -> List[Dict[str, str]]: - """ - A function to obfuscate PII fields in a list of dictionaries, replacing - sensitive values with a string of asterisks. - """ - if not data: - logger.error( - "Invalid or empty data was provided to obfuscate. Returning empty list." - ) - return [] - if not pii_fields: - logger.error("No PII fields provided to obfuscate. Returning data unchanged.") - return data - - return [ - {k: ("***" if k in pii_fields else v) for k, v in record.items()} - for record in data - ] diff --git a/obfuscator/read.py b/obfuscator/read.py deleted file mode 100644 index b704643..0000000 --- a/obfuscator/read.py +++ /dev/null @@ -1,89 +0,0 @@ -import csv -import io -import boto3 -import os -from typing import List, Dict -from obfuscator.logger import get_logger -from obfuscator.utils import Utilities - - -class DataReader: - """ - A class to read CSV data from a local file, S3 object, or string. Near - the project completion, support for JSON/Parquet files will be added. - """ - - def __init__(self, log_level=None): - self.log_level = log_level - self.logger = get_logger("CSVREADER", log_level) - - def read_local(self, path) -> List[Dict[str, str]]: - """ - A method to read a local CSV file and return the data as a list of - dictionaries. - """ - self.logger.debug(f"Reading local CSV from: {path}") - - try: - with open(path, mode="r", encoding="utf-8") as f: - reader = csv.DictReader(f) - return [dict(row) for row in reader] - except FileNotFoundError: - self.logger.error(f"File not found: {path}") - raise - except Exception as e: - self.logger.error(f"Error reading file: {e}") - - def read_s3(self, path) -> List[Dict[str, str]]: - """ - A method to read an S3 object containing CSV data - and return the data as a list of dictionaries. - """ - utils = Utilities(self.log_level) - bucket, key = utils.get_s3_path(path) - self.logger.debug(f"Reading S3 CSV from: {bucket}/{key}") - - if os.getenv("LOCALSTACK", "FALSE").upper() == "TRUE": - localstack_endpoint = "http://localhost.localstack.cloud:4566" - self.logger.debug( - "Using LocalStack endpoint for S3 - ensure LocalStack is running" - ) - client = boto3.client( - "s3", - endpoint_url=localstack_endpoint, - aws_access_key_id="dummy", - aws_secret_access_key="dummy", - ) - self.logger.debug(f"endpoint_url: {localstack_endpoint}") - else: - client = boto3.client("s3") - - try: - response = client.get_object(Bucket=bucket, Key=key) - self.logger.info("S3 object read successfully") - content = response["Body"].read().decode("utf-8") - return self.read_string(content) - except client.exceptions.NoSuchKey: - self.logger.error(f"Object not found: {bucket}/{key}") - raise - except client.exceptions.ClientError as e: - self.logger.error(f"Error reading S3 object: {e}") - raise - except UnicodeDecodeError as e: - self.logger.error(f"Error decoding S3 object: {e}") - raise - except Exception as e: - self.logger.error(f"Error reading S3 object: {e}") - raise - - def read_string(self, content: str) -> List[Dict[str, str]]: - """ - A method to read CSV data from a string and return the data as a list - of dictionaries. - """ - if not content.strip(): - return [] - - f = io.StringIO(content) - reader = csv.DictReader(f) - return [dict(row) for row in reader] diff --git a/obfuscator/utils.py b/obfuscator/utils.py deleted file mode 100644 index 77ca1cf..0000000 --- a/obfuscator/utils.py +++ /dev/null @@ -1,16 +0,0 @@ -# Utility functions -from obfuscator.logger import get_logger - - -class Utilities: - def __init__(self, logger=None): - self.logger = get_logger("UTILITIES", logger) - - def get_s3_path(self, uri): - parts = uri.replace("s3://", "").split("/") - self.logger.debug(f"Parts: {parts}") - bucket = parts.pop(0) - self.logger.debug(f"Bucket: {bucket}") - key = "/".join(parts) - self.logger.debug(f"Key: {key}") - return bucket, key diff --git a/obfuscator/write.py b/obfuscator/write.py deleted file mode 100644 index 451b073..0000000 --- a/obfuscator/write.py +++ /dev/null @@ -1,28 +0,0 @@ -import csv -import io -from typing import List, Dict -from obfuscator.logger import get_logger - -logger = get_logger("CSVWRITER") - - -class DataWriter: - def __init__(self): - pass - - def create_byte_stream(self, data: List[Dict[str, str]]) -> bytes: - if not data: - logger.error("Invalid or empty data was provided to write") - return b"" - - output = io.StringIO() - - headers = list(data[0].keys()) - - writer = csv.DictWriter(output, fieldnames=headers) - writer.writeheader() - writer.writerows(data) - - csv_string = output.getvalue() - - return csv_string.encode("utf-8") |
