From e939b481a53be8e9075efec594b9d9efc6f712b4 Mon Sep 17 00:00:00 2001 From: Alex Schofield Date: Mon, 3 Mar 2025 00:42:05 +0000 Subject: improve error handling in read_s3() --- gdpr_obfuscator/read.py | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) (limited to 'gdpr_obfuscator/read.py') diff --git a/gdpr_obfuscator/read.py b/gdpr_obfuscator/read.py index 1d80718..48e94c6 100644 --- a/gdpr_obfuscator/read.py +++ b/gdpr_obfuscator/read.py @@ -59,10 +59,23 @@ class FileHandler: client = boto3.client("s3") - response = client.get_object(Bucket=bucket, Key=key) - content = response["Body"].read().decode("utf-8") - read_csv_content = self.read_string(content) - return read_csv_content + try: + response = client.get_object(Bucket=bucket, Key=key) + except client.exceptions.NoSuchKey: + raise ValueError(f"File not found in S3 bucket: {bucket}/{key}") + except client.exceptions.NoSuchBucket: + raise ValueError(f"Bucket not found in S3: {bucket}") + except client.exceptions.ClientError as e: + raise ValueError(f"Error accessing S3: {e}") + + try: + content = response["Body"].read().decode("utf-8") + except UnicodeDecodeError: + raise ValueError("File is not UTF-8 encoded or malformed") + except Exception as e: + raise ValueError(f"Error reading file from S3: {e}") + + return self.read_string(content) @staticmethod def read_string(content: str) -> List[Dict[str, str]]: -- cgit v1.2.3