aboutsummaryrefslogtreecommitdiffstats
path: root/gdpr_obfuscator
diff options
context:
space:
mode:
Diffstat (limited to 'gdpr_obfuscator')
-rw-r--r--gdpr_obfuscator/__init__.py6
-rw-r--r--gdpr_obfuscator/obfuscate.py11
-rw-r--r--gdpr_obfuscator/read.py36
-rw-r--r--gdpr_obfuscator/utils.py31
4 files changed, 9 insertions, 75 deletions
diff --git a/gdpr_obfuscator/__init__.py b/gdpr_obfuscator/__init__.py
index 69854cb..43c1cc0 100644
--- a/gdpr_obfuscator/__init__.py
+++ b/gdpr_obfuscator/__init__.py
@@ -1,14 +1,10 @@
from .read import DataReader
from .obfuscate import obfuscate
from typing import List, Dict
-from .utils import Utilities
class Obfuscator:
- def __init__(self, verbosity: bool = False):
- self.verbosity = verbosity
- self.log_level = "DEBUG" if verbosity else "INFO"
- self.logger = Utilities.get_logger("ImportData", self.log_level)
+ def __init__(self):
self.reader = DataReader()
def import_s3(self, path: str, pii_fields: List[str]) -> bytes:
diff --git a/gdpr_obfuscator/obfuscate.py b/gdpr_obfuscator/obfuscate.py
index 6439542..1b6cf0c 100644
--- a/gdpr_obfuscator/obfuscate.py
+++ b/gdpr_obfuscator/obfuscate.py
@@ -1,7 +1,4 @@
from typing import List, Dict
-from .utils import Utilities
-
-logger = Utilities.get_logger("OBFUSCATE")
def obfuscate(
@@ -11,14 +8,6 @@ def obfuscate(
A function to obfuscate PII fields in a list of dictionaries, replacing
sensitive values with a string of asterisks.
"""
- if not data:
- logger.error(
- "Invalid or empty data was provided to obfuscate. Returning empty list."
- )
- return []
- if not pii_fields:
- logger.error("No PII fields provided to obfuscate. Returning data unchanged.")
- return data
return [
{k: ("***" if k in pii_fields else v) for k, v in record.items()}
diff --git a/gdpr_obfuscator/read.py b/gdpr_obfuscator/read.py
index 7f6372b..a486447 100644
--- a/gdpr_obfuscator/read.py
+++ b/gdpr_obfuscator/read.py
@@ -2,7 +2,6 @@ import csv
import io
import boto3
from typing import List, Dict
-from .utils import Utilities
class DataReader:
@@ -11,27 +10,21 @@ class DataReader:
the project completion, support for JSON/Parquet files will be added.
"""
- def __init__(self, log_level=None):
- self.utils = Utilities(self.log_level)
- self.log_level = log_level
- self.logger = self.utils.get_logger("CSVREADER", log_level)
+ def __init__(self):
+ pass
def read_local(self, path) -> List[Dict[str, str]]:
"""
A method to read a local CSV file and return the data as a list of
dictionaries.
"""
- self.logger.debug(f"Reading local CSV from: {path}")
try:
with open(path, mode="r", encoding="utf-8") as f:
reader = csv.DictReader(f)
return [dict(row) for row in reader]
- except FileNotFoundError:
- self.logger.error(f"File not found: {path}")
- raise
except Exception as e:
- self.logger.error(f"Error reading file: {e}")
+ pass
def read_s3(self, path) -> List[Dict[str, str]]:
"""
@@ -39,28 +32,13 @@ class DataReader:
and return the data as a list of dictionaries.
"""
bucket, key = self.utils.get_s3_path(path)
- self.logger.debug(f"Reading S3 CSV from: {bucket}/{key}")
client = boto3.client("s3")
- try:
- response = client.get_object(Bucket=bucket, Key=key)
- self.logger.info("S3 object read successfully")
- content = response["Body"].read().decode("utf-8")
- read_csv_content = self.read_string(content)
- return self.utils.create_byte_stream(read_csv_content)
- except client.exceptions.NoSuchKey:
- self.logger.error(f"Object not found: {bucket}/{key}")
- raise
- except client.exceptions.ClientError as e:
- self.logger.error(f"Error reading S3 object: {e}")
- raise
- except UnicodeDecodeError as e:
- self.logger.error(f"Error decoding S3 object: {e}")
- raise
- except Exception as e:
- self.logger.error(f"Error reading S3 object: {e}")
- raise
+ response = client.get_object(Bucket=bucket, Key=key)
+ content = response["Body"].read().decode("utf-8")
+ read_csv_content = self.read_string(content)
+ return self.utils.create_byte_stream(read_csv_content)
def read_string(self, content: str) -> List[Dict[str, str]]:
"""
diff --git a/gdpr_obfuscator/utils.py b/gdpr_obfuscator/utils.py
index 9283d9e..c8aadb2 100644
--- a/gdpr_obfuscator/utils.py
+++ b/gdpr_obfuscator/utils.py
@@ -1,50 +1,21 @@
import csv
import io
-import logging
from enum import Enum
from typing import List, Dict
class Utilities:
- class LogLevel(Enum):
- DEBUG = logging.DEBUG
- INFO = logging.INFO
- WARNING = logging.WARNING
- ERROR = logging.ERROR
- CRITICAL = logging.CRITICAL
-
- @staticmethod
- def get_logger(name: str, level: "Utilities.LogLevel" = None) -> logging.Logger:
- level = level or Utilities.LogLevel.INFO
- logger = logging.getLogger(name)
- if logger.hasHandlers():
- logger.handlers.clear()
-
- handler = logging.StreamHandler()
- logger.setLevel(level.value)
- formatter = logging.Formatter(
- "[%(asctime)s] - %(levelname)s::%(name)s - %(message)s"
- )
- handler.setFormatter(formatter)
- logger.addHandler(handler)
- return logger
def __init__(self, logger=None):
- self.logger = self.get_logger(__name__, logger)
+ pass
def get_s3_path(self, uri):
parts = uri.replace("s3://", "").split("/")
- self.logger.debug(f"Parts: {parts}")
bucket = parts.pop(0)
- self.logger.debug(f"Bucket: {bucket}")
key = "/".join(parts)
- self.logger.debug(f"Key: {key}")
return bucket, key
def create_byte_stream(self, data: List[Dict[str, str]]) -> bytes:
- if not data:
- self.logger.error("Invalid or empty data was provided to write")
- return b""
output = io.StringIO()
git.ajschof.me — hosted by ajschofield — powered by cgit