From 8465343ef01fd18fd753efa55d770e66addfb46f Mon Sep 17 00:00:00 2001 From: Alex Schofield Date: Fri, 21 Feb 2025 00:04:47 +0000 Subject: remove all logging temporarily as it's being a pain --- cli.py | 12 ++---------- gdpr_obfuscator/__init__.py | 6 +----- gdpr_obfuscator/obfuscate.py | 11 ----------- gdpr_obfuscator/read.py | 36 +++++++----------------------------- gdpr_obfuscator/utils.py | 31 +------------------------------ 5 files changed, 11 insertions(+), 85 deletions(-) diff --git a/cli.py b/cli.py index ed00d32..ef77314 100644 --- a/cli.py +++ b/cli.py @@ -1,13 +1,10 @@ import argparse from gdpr_obfuscator.read import DataReader from gdpr_obfuscator.obfuscate import obfuscate -from gdpr_obfuscator.utils import Utilities def main(): - utils = Utilities() - parser = argparse.ArgumentParser( prog="GDPR-Obfuscator", description="Obfuscate sensitive data stored locally or in an AWS environment", @@ -31,21 +28,16 @@ def main(): args = parser.parse_args() - log_level = "DEBUG" if args.verbose else "INFO" - logger = utils.get_logger("CLI", log_level) - - reader = DataReader(log_level) + reader = DataReader() if args.local and not args.s3: - logger.debug("Read data from local path") data = reader.read_local(args.local) else: - logger.debug("Read data from S3") data = reader.read_s3(args.s3) obfuscated_data = obfuscate(data, args.pii) - return obfuscated_data + print(obfuscated_data) if __name__ == "__main__": diff --git a/gdpr_obfuscator/__init__.py b/gdpr_obfuscator/__init__.py index 69854cb..43c1cc0 100644 --- a/gdpr_obfuscator/__init__.py +++ b/gdpr_obfuscator/__init__.py @@ -1,14 +1,10 @@ from .read import DataReader from .obfuscate import obfuscate from typing import List, Dict -from .utils import Utilities class Obfuscator: - def __init__(self, verbosity: bool = False): - self.verbosity = verbosity - self.log_level = "DEBUG" if verbosity else "INFO" - self.logger = Utilities.get_logger("ImportData", self.log_level) + def __init__(self): self.reader = DataReader() def import_s3(self, path: str, pii_fields: List[str]) -> bytes: diff --git a/gdpr_obfuscator/obfuscate.py b/gdpr_obfuscator/obfuscate.py index 6439542..1b6cf0c 100644 --- a/gdpr_obfuscator/obfuscate.py +++ b/gdpr_obfuscator/obfuscate.py @@ -1,7 +1,4 @@ from typing import List, Dict -from .utils import Utilities - -logger = Utilities.get_logger("OBFUSCATE") def obfuscate( @@ -11,14 +8,6 @@ def obfuscate( A function to obfuscate PII fields in a list of dictionaries, replacing sensitive values with a string of asterisks. """ - if not data: - logger.error( - "Invalid or empty data was provided to obfuscate. Returning empty list." - ) - return [] - if not pii_fields: - logger.error("No PII fields provided to obfuscate. Returning data unchanged.") - return data return [ {k: ("***" if k in pii_fields else v) for k, v in record.items()} diff --git a/gdpr_obfuscator/read.py b/gdpr_obfuscator/read.py index 7f6372b..a486447 100644 --- a/gdpr_obfuscator/read.py +++ b/gdpr_obfuscator/read.py @@ -2,7 +2,6 @@ import csv import io import boto3 from typing import List, Dict -from .utils import Utilities class DataReader: @@ -11,27 +10,21 @@ class DataReader: the project completion, support for JSON/Parquet files will be added. """ - def __init__(self, log_level=None): - self.utils = Utilities(self.log_level) - self.log_level = log_level - self.logger = self.utils.get_logger("CSVREADER", log_level) + def __init__(self): + pass def read_local(self, path) -> List[Dict[str, str]]: """ A method to read a local CSV file and return the data as a list of dictionaries. """ - self.logger.debug(f"Reading local CSV from: {path}") try: with open(path, mode="r", encoding="utf-8") as f: reader = csv.DictReader(f) return [dict(row) for row in reader] - except FileNotFoundError: - self.logger.error(f"File not found: {path}") - raise except Exception as e: - self.logger.error(f"Error reading file: {e}") + pass def read_s3(self, path) -> List[Dict[str, str]]: """ @@ -39,28 +32,13 @@ class DataReader: and return the data as a list of dictionaries. """ bucket, key = self.utils.get_s3_path(path) - self.logger.debug(f"Reading S3 CSV from: {bucket}/{key}") client = boto3.client("s3") - try: - response = client.get_object(Bucket=bucket, Key=key) - self.logger.info("S3 object read successfully") - content = response["Body"].read().decode("utf-8") - read_csv_content = self.read_string(content) - return self.utils.create_byte_stream(read_csv_content) - except client.exceptions.NoSuchKey: - self.logger.error(f"Object not found: {bucket}/{key}") - raise - except client.exceptions.ClientError as e: - self.logger.error(f"Error reading S3 object: {e}") - raise - except UnicodeDecodeError as e: - self.logger.error(f"Error decoding S3 object: {e}") - raise - except Exception as e: - self.logger.error(f"Error reading S3 object: {e}") - raise + response = client.get_object(Bucket=bucket, Key=key) + content = response["Body"].read().decode("utf-8") + read_csv_content = self.read_string(content) + return self.utils.create_byte_stream(read_csv_content) def read_string(self, content: str) -> List[Dict[str, str]]: """ diff --git a/gdpr_obfuscator/utils.py b/gdpr_obfuscator/utils.py index 9283d9e..c8aadb2 100644 --- a/gdpr_obfuscator/utils.py +++ b/gdpr_obfuscator/utils.py @@ -1,50 +1,21 @@ import csv import io -import logging from enum import Enum from typing import List, Dict class Utilities: - class LogLevel(Enum): - DEBUG = logging.DEBUG - INFO = logging.INFO - WARNING = logging.WARNING - ERROR = logging.ERROR - CRITICAL = logging.CRITICAL - - @staticmethod - def get_logger(name: str, level: "Utilities.LogLevel" = None) -> logging.Logger: - level = level or Utilities.LogLevel.INFO - logger = logging.getLogger(name) - if logger.hasHandlers(): - logger.handlers.clear() - - handler = logging.StreamHandler() - logger.setLevel(level.value) - formatter = logging.Formatter( - "[%(asctime)s] - %(levelname)s::%(name)s - %(message)s" - ) - handler.setFormatter(formatter) - logger.addHandler(handler) - return logger def __init__(self, logger=None): - self.logger = self.get_logger(__name__, logger) + pass def get_s3_path(self, uri): parts = uri.replace("s3://", "").split("/") - self.logger.debug(f"Parts: {parts}") bucket = parts.pop(0) - self.logger.debug(f"Bucket: {bucket}") key = "/".join(parts) - self.logger.debug(f"Key: {key}") return bucket, key def create_byte_stream(self, data: List[Dict[str, str]]) -> bytes: - if not data: - self.logger.error("Invalid or empty data was provided to write") - return b"" output = io.StringIO() -- cgit v1.2.3