aboutsummaryrefslogtreecommitdiffstats
path: root/obfuscator
diff options
context:
space:
mode:
Diffstat (limited to 'obfuscator')
-rw-r--r--obfuscator/__init__.py12
-rw-r--r--obfuscator/logger.py36
-rw-r--r--obfuscator/obfuscate.py26
-rw-r--r--obfuscator/read.py89
-rw-r--r--obfuscator/utils.py16
-rw-r--r--obfuscator/write.py28
6 files changed, 0 insertions, 207 deletions
diff --git a/obfuscator/__init__.py b/obfuscator/__init__.py
deleted file mode 100644
index 2ee3b96..0000000
--- a/obfuscator/__init__.py
+++ /dev/null
@@ -1,12 +0,0 @@
-from obfuscator.read import DataReader
-from obfuscator.write import DataWriter
-from obfuscator.obfuscate import obfuscate
-from typing import List
-
-
-def main(s3_source: str, pii_fields: List[str], log_level: str = "INFO") -> bytes:
- reader = DataReader(log_level)
- writer = DataWriter()
- data = reader.read_s3(s3_source)
- obfuscated_data = obfuscate(data, pii_fields)
- return writer.create_byte_stream(obfuscated_data)
diff --git a/obfuscator/logger.py b/obfuscator/logger.py
deleted file mode 100644
index 140fa8f..0000000
--- a/obfuscator/logger.py
+++ /dev/null
@@ -1,36 +0,0 @@
-import logging
-import os
-from enum import Enum
-
-
-class LogLevel(Enum):
- DEBUG = logging.DEBUG
- INFO = logging.INFO
- WARNING = logging.WARNING
- ERROR = logging.ERROR
- CRITICAL = logging.CRITICAL
-
-
-def get_logger(name: str, level: LogLevel = LogLevel.INFO) -> logging.Logger:
- if isinstance(level, str):
- try:
- level = LogLevel[level.upper()]
- except KeyError:
- raise ValueError(
- f"Invalid log level '{level}'. Choose from: {', '.join(l.name for l in LogLevel)}"
- )
-
- logger = logging.getLogger(name)
-
- if logger.hasHandlers():
- logger.handlers.clear()
-
- handler = logging.StreamHandler()
- logger.setLevel(level.value)
- formatting = logging.Formatter(
- "[%(asctime)s] - %(levelname)s::%(name)s - %(message)s"
- )
- handler.setFormatter(formatting)
- logger.addHandler(handler)
-
- return logger
diff --git a/obfuscator/obfuscate.py b/obfuscator/obfuscate.py
deleted file mode 100644
index cd12b6d..0000000
--- a/obfuscator/obfuscate.py
+++ /dev/null
@@ -1,26 +0,0 @@
-from typing import List, Dict
-from obfuscator.logger import get_logger
-
-logger = get_logger("OBFUSCATE")
-
-
-def obfuscate(
- data: List[Dict[str, str]], pii_fields: List[str]
-) -> List[Dict[str, str]]:
- """
- A function to obfuscate PII fields in a list of dictionaries, replacing
- sensitive values with a string of asterisks.
- """
- if not data:
- logger.error(
- "Invalid or empty data was provided to obfuscate. Returning empty list."
- )
- return []
- if not pii_fields:
- logger.error("No PII fields provided to obfuscate. Returning data unchanged.")
- return data
-
- return [
- {k: ("***" if k in pii_fields else v) for k, v in record.items()}
- for record in data
- ]
diff --git a/obfuscator/read.py b/obfuscator/read.py
deleted file mode 100644
index b704643..0000000
--- a/obfuscator/read.py
+++ /dev/null
@@ -1,89 +0,0 @@
-import csv
-import io
-import boto3
-import os
-from typing import List, Dict
-from obfuscator.logger import get_logger
-from obfuscator.utils import Utilities
-
-
-class DataReader:
- """
- A class to read CSV data from a local file, S3 object, or string. Near
- the project completion, support for JSON/Parquet files will be added.
- """
-
- def __init__(self, log_level=None):
- self.log_level = log_level
- self.logger = get_logger("CSVREADER", log_level)
-
- def read_local(self, path) -> List[Dict[str, str]]:
- """
- A method to read a local CSV file and return the data as a list of
- dictionaries.
- """
- self.logger.debug(f"Reading local CSV from: {path}")
-
- try:
- with open(path, mode="r", encoding="utf-8") as f:
- reader = csv.DictReader(f)
- return [dict(row) for row in reader]
- except FileNotFoundError:
- self.logger.error(f"File not found: {path}")
- raise
- except Exception as e:
- self.logger.error(f"Error reading file: {e}")
-
- def read_s3(self, path) -> List[Dict[str, str]]:
- """
- A method to read an S3 object containing CSV data
- and return the data as a list of dictionaries.
- """
- utils = Utilities(self.log_level)
- bucket, key = utils.get_s3_path(path)
- self.logger.debug(f"Reading S3 CSV from: {bucket}/{key}")
-
- if os.getenv("LOCALSTACK", "FALSE").upper() == "TRUE":
- localstack_endpoint = "http://localhost.localstack.cloud:4566"
- self.logger.debug(
- "Using LocalStack endpoint for S3 - ensure LocalStack is running"
- )
- client = boto3.client(
- "s3",
- endpoint_url=localstack_endpoint,
- aws_access_key_id="dummy",
- aws_secret_access_key="dummy",
- )
- self.logger.debug(f"endpoint_url: {localstack_endpoint}")
- else:
- client = boto3.client("s3")
-
- try:
- response = client.get_object(Bucket=bucket, Key=key)
- self.logger.info("S3 object read successfully")
- content = response["Body"].read().decode("utf-8")
- return self.read_string(content)
- except client.exceptions.NoSuchKey:
- self.logger.error(f"Object not found: {bucket}/{key}")
- raise
- except client.exceptions.ClientError as e:
- self.logger.error(f"Error reading S3 object: {e}")
- raise
- except UnicodeDecodeError as e:
- self.logger.error(f"Error decoding S3 object: {e}")
- raise
- except Exception as e:
- self.logger.error(f"Error reading S3 object: {e}")
- raise
-
- def read_string(self, content: str) -> List[Dict[str, str]]:
- """
- A method to read CSV data from a string and return the data as a list
- of dictionaries.
- """
- if not content.strip():
- return []
-
- f = io.StringIO(content)
- reader = csv.DictReader(f)
- return [dict(row) for row in reader]
diff --git a/obfuscator/utils.py b/obfuscator/utils.py
deleted file mode 100644
index 77ca1cf..0000000
--- a/obfuscator/utils.py
+++ /dev/null
@@ -1,16 +0,0 @@
-# Utility functions
-from obfuscator.logger import get_logger
-
-
-class Utilities:
- def __init__(self, logger=None):
- self.logger = get_logger("UTILITIES", logger)
-
- def get_s3_path(self, uri):
- parts = uri.replace("s3://", "").split("/")
- self.logger.debug(f"Parts: {parts}")
- bucket = parts.pop(0)
- self.logger.debug(f"Bucket: {bucket}")
- key = "/".join(parts)
- self.logger.debug(f"Key: {key}")
- return bucket, key
diff --git a/obfuscator/write.py b/obfuscator/write.py
deleted file mode 100644
index 451b073..0000000
--- a/obfuscator/write.py
+++ /dev/null
@@ -1,28 +0,0 @@
-import csv
-import io
-from typing import List, Dict
-from obfuscator.logger import get_logger
-
-logger = get_logger("CSVWRITER")
-
-
-class DataWriter:
- def __init__(self):
- pass
-
- def create_byte_stream(self, data: List[Dict[str, str]]) -> bytes:
- if not data:
- logger.error("Invalid or empty data was provided to write")
- return b""
-
- output = io.StringIO()
-
- headers = list(data[0].keys())
-
- writer = csv.DictWriter(output, fieldnames=headers)
- writer.writeheader()
- writer.writerows(data)
-
- csv_string = output.getvalue()
-
- return csv_string.encode("utf-8")
git.ajschof.me — hosted by ajschofield — powered by cgit