From 3c4b66e8490c6fdf93fb8fee735d52c76eb2853b Mon Sep 17 00:00:00 2001 From: Alex Schofield Date: Wed, 19 Feb 2025 03:22:26 +0000 Subject: remove annoying comments for better readability of code --- test/test_csv_reader.py | 23 ----------------------- test/test_obfuscator.py | 15 --------------- 2 files changed, 38 deletions(-) (limited to 'test') diff --git a/test/test_csv_reader.py b/test/test_csv_reader.py index e4c135b..0206542 100644 --- a/test/test_csv_reader.py +++ b/test/test_csv_reader.py @@ -1,6 +1,3 @@ -# csv_reader.py - tests for read_string and read_s3 -# Author: Alex Schofield - import boto3 from moto import mock_aws from obfuscator.csv_reader import CSVReader @@ -8,11 +5,6 @@ import pytest reader = CSVReader() -# CSVREADER: READ_STRING TESTS - -# Check if the function can read a CSV string with no content and return -# an empty list - def test_empty_csv_should_return_no_content(): content = "" @@ -21,10 +13,6 @@ def test_empty_csv_should_return_no_content(): assert result == expected -# Check if the function can read a CSV string with only a header and return -# an empty list - - def test_csv_with_header_only_should_return_no_content(): content = "student_id,name,course\n" result = reader.read_string(content) @@ -32,10 +20,6 @@ def test_csv_with_header_only_should_return_no_content(): assert result == expected -# Check if the function can read a CSV string with valid data and return -# a list of dictionaries - - def test_csv_with_valid_data(): content = ( "student_id,name,course\n" @@ -50,10 +34,6 @@ def test_csv_with_valid_data(): assert result == expected -# Check if the function can read a CSV string with quoted fields and return -# a list of dictionaries with the quoted fields intact - - def test_csv_with_quoted_fields_should_run_as_expected(): content = ( "student_id,name,course\n" @@ -68,9 +48,6 @@ def test_csv_with_quoted_fields_should_run_as_expected(): assert result == expected -# CSVREADER: READ_S3 TESTS - - def setup_s3(s3_client, bucket: str, key: str, content: str): s3_client.create_bucket( Bucket=bucket, diff --git a/test/test_obfuscator.py b/test/test_obfuscator.py index 4f61b16..c77b6b4 100644 --- a/test/test_obfuscator.py +++ b/test/test_obfuscator.py @@ -1,8 +1,5 @@ from obfuscator.obfuscate import obfuscate -# Check if the function does what its supposed to and can obfuscate -# valid PII fields in a list of dictionaries - def test_obfuscate_data_with_valid_pii_fields(): data = [ @@ -39,11 +36,6 @@ def test_obfuscate_data_with_valid_pii_fields(): assert result == expected -# Check if the function can obfuscate data even when some PII -# fields are missing from some of the data, returning a list of dictionaries -# but with the missing PII fields obfuscated and the rest of the data intact - - def test_obfuscate_data_with_missing_pii_field(): data = [ {"student_id": "1234", "name": "John Smith", "course": "Software"}, @@ -69,9 +61,6 @@ def test_obfuscate_data_with_missing_pii_field(): assert result == expected -# Check if the function can handle an empty list of data, returning an empty list - - def test_obfuscate_data_with_no_data(): data = [] pii_fields = ["name", "email_address"] @@ -81,10 +70,6 @@ def test_obfuscate_data_with_no_data(): assert result == expected -# Check if the function can handle an empty list of PII fields, returning the data as is -# without mutating it - - def test_obfuscate_data_with_empty_pii_fields(): data = [ { -- cgit v1.2.3 From 648c3785071adfae163e7ac1321f19188a5e4921 Mon Sep 17 00:00:00 2001 From: Alex Schofield Date: Wed, 19 Feb 2025 03:48:14 +0000 Subject: use already initialised reader object instead of creating new CSVReader instance --- test/test_csv_reader.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'test') diff --git a/test/test_csv_reader.py b/test/test_csv_reader.py index 0206542..d6129e7 100644 --- a/test/test_csv_reader.py +++ b/test/test_csv_reader.py @@ -3,7 +3,7 @@ from moto import mock_aws from obfuscator.csv_reader import CSVReader import pytest -reader = CSVReader() +reader = CSVReader(log_level="DEBUG") def test_empty_csv_should_return_no_content(): @@ -96,7 +96,7 @@ def test_read_s3_empty_csv_returns_empty_list(): setup_s3(s3, bucket, key, csv_content) path = f"s3://{bucket}/{key}" - data = CSVReader.read_s3(path) + data = reader.read_s3(path) assert data == [] @@ -106,7 +106,7 @@ def test_read_s3_nonexistent_bucket_raises_exception(): key = "data/mock.csv" path = f"s3://{bucket}/{key}" with pytest.raises(Exception): - CSVReader.read_s3(path) + reader.read_s3(path) def test_read_s3_nonexistent_key_raises_exception(): @@ -120,7 +120,7 @@ def test_read_s3_nonexistent_key_raises_exception(): key = "data/nonexistent.csv" path = f"s3://{bucket}/{key}" with pytest.raises(Exception): - CSVReader.read_s3(path) + reader.read_s3(path) def test_read_s3_malformed_csv_returns_expected(): @@ -132,7 +132,7 @@ def test_read_s3_malformed_csv_returns_expected(): setup_s3(s3, bucket, key, csv_content) path = f"s3://{bucket}/{key}" - data = CSVReader.read_s3(path) + data = reader.read_s3(path) expected = [{"1234": "5678", "Student 1": "Student 2", "Course 1": "Course 2"}] assert data == expected @@ -152,7 +152,7 @@ def test_read_s3_csv_with_extra_empty_lines(): setup_s3(s3, bucket, key, csv_content) path = f"s3://{bucket}/{key}" - data = CSVReader.read_s3(path) + data = reader.read_s3(path) expected = [ {"student_id": "1234", "name": "Student 1", "course": "Course 1"}, {"student_id": "5678", "name": "Student 2", "course": "Course 2"}, @@ -173,7 +173,7 @@ def test_read_s3_csv_with_whitespace_in_fields(): setup_s3(s3, bucket, key, csv_content) path = f"s3://{bucket}/{key}" - data = CSVReader.read_s3(path) + data = reader.read_s3(path) expected = [ {"student_id": " 1234 ", " name ": " Student 1 ", " course ": " Course 1 "}, {"student_id": "5678", " name ": "Student 2", " course ": "Course 2"}, -- cgit v1.2.3 From ae89b05dbc8feebc1410f39143c0d829f8704235 Mon Sep 17 00:00:00 2001 From: Alex Schofield Date: Wed, 19 Feb 2025 15:37:16 +0000 Subject: rename CSVReader to DataReader & update references --- cli.py | 4 +- obfuscator/csv_reader.py | 89 ----------------------- obfuscator/read.py | 89 +++++++++++++++++++++++ test/test_csv_reader.py | 181 ----------------------------------------------- test/test_read.py | 181 +++++++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 272 insertions(+), 272 deletions(-) delete mode 100644 obfuscator/csv_reader.py create mode 100644 obfuscator/read.py delete mode 100644 test/test_csv_reader.py create mode 100644 test/test_read.py (limited to 'test') diff --git a/cli.py b/cli.py index 7ffccd8..5100e2b 100644 --- a/cli.py +++ b/cli.py @@ -1,5 +1,5 @@ import argparse -from obfuscator.csv_reader import CSVReader +from obfuscator.read import DataReader from obfuscator.obfuscate import obfuscate from obfuscator.logger import get_logger from obfuscator.csv_writer import create_byte_stream @@ -32,7 +32,7 @@ def main(): log_level = "DEBUG" if args.verbose else "INFO" logger = get_logger("CLI", log_level) - reader = CSVReader(log_level) + reader = DataReader(log_level) if args.local and not args.s3: logger.debug("Read data from local path") diff --git a/obfuscator/csv_reader.py b/obfuscator/csv_reader.py deleted file mode 100644 index 1f503d7..0000000 --- a/obfuscator/csv_reader.py +++ /dev/null @@ -1,89 +0,0 @@ -import csv -import io -import boto3 -import os -from typing import List, Dict -from obfuscator.logger import get_logger -from obfuscator.utils import Utilities - - -class CSVReader: - """ - A class to read CSV data from a local file, S3 object, or string. Near - the project completion, support for JSON/Parquet files will be added. - """ - - def __init__(self, log_level=None): - self.log_level = log_level - self.logger = get_logger("CSVREADER", log_level) - - def read_local(self, path) -> List[Dict[str, str]]: - """ - A method to read a local CSV file and return the data as a list of - dictionaries. - """ - self.logger.debug(f"Reading local CSV from: {path}") - - try: - with open(path, mode="r", encoding="utf-8") as f: - reader = csv.DictReader(f) - return [dict(row) for row in reader] - except FileNotFoundError: - self.logger.error(f"File not found: {path}") - raise - except Exception as e: - self.logger.error(f"Error reading file: {e}") - - def read_s3(self, path) -> List[Dict[str, str]]: - """ - A method to read an S3 object containing CSV data - and return the data as a list of dictionaries. - """ - utils = Utilities(self.log_level) - bucket, key = utils.get_s3_path(path) - self.logger.debug(f"Reading S3 CSV from: {bucket}/{key}") - - if os.getenv("LOCALSTACK", "FALSE").upper() == "TRUE": - localstack_endpoint = "http://localhost.localstack.cloud:4566" - self.logger.debug( - "Using LocalStack endpoint for S3 - ensure LocalStack is running" - ) - client = boto3.client( - "s3", - endpoint_url=localstack_endpoint, - aws_access_key_id="dummy", - aws_secret_access_key="dummy", - ) - self.logger.debug(f"endpoint_url: {localstack_endpoint}") - else: - client = boto3.client("s3") - - try: - response = client.get_object(Bucket=bucket, Key=key) - self.logger.info("S3 object read successfully") - content = response["Body"].read().decode("utf-8") - return self.read_string(content) - except client.exceptions.NoSuchKey: - self.logger.error(f"Object not found: {bucket}/{key}") - raise - except client.exceptions.ClientError as e: - self.logger.error(f"Error reading S3 object: {e}") - raise - except UnicodeDecodeError as e: - self.logger.error(f"Error decoding S3 object: {e}") - raise - except Exception as e: - self.logger.error(f"Error reading S3 object: {e}") - raise - - def read_string(self, content: str) -> List[Dict[str, str]]: - """ - A method to read CSV data from a string and return the data as a list - of dictionaries. - """ - if not content.strip(): - return [] - - f = io.StringIO(content) - reader = csv.DictReader(f) - return [dict(row) for row in reader] diff --git a/obfuscator/read.py b/obfuscator/read.py new file mode 100644 index 0000000..b704643 --- /dev/null +++ b/obfuscator/read.py @@ -0,0 +1,89 @@ +import csv +import io +import boto3 +import os +from typing import List, Dict +from obfuscator.logger import get_logger +from obfuscator.utils import Utilities + + +class DataReader: + """ + A class to read CSV data from a local file, S3 object, or string. Near + the project completion, support for JSON/Parquet files will be added. + """ + + def __init__(self, log_level=None): + self.log_level = log_level + self.logger = get_logger("CSVREADER", log_level) + + def read_local(self, path) -> List[Dict[str, str]]: + """ + A method to read a local CSV file and return the data as a list of + dictionaries. + """ + self.logger.debug(f"Reading local CSV from: {path}") + + try: + with open(path, mode="r", encoding="utf-8") as f: + reader = csv.DictReader(f) + return [dict(row) for row in reader] + except FileNotFoundError: + self.logger.error(f"File not found: {path}") + raise + except Exception as e: + self.logger.error(f"Error reading file: {e}") + + def read_s3(self, path) -> List[Dict[str, str]]: + """ + A method to read an S3 object containing CSV data + and return the data as a list of dictionaries. + """ + utils = Utilities(self.log_level) + bucket, key = utils.get_s3_path(path) + self.logger.debug(f"Reading S3 CSV from: {bucket}/{key}") + + if os.getenv("LOCALSTACK", "FALSE").upper() == "TRUE": + localstack_endpoint = "http://localhost.localstack.cloud:4566" + self.logger.debug( + "Using LocalStack endpoint for S3 - ensure LocalStack is running" + ) + client = boto3.client( + "s3", + endpoint_url=localstack_endpoint, + aws_access_key_id="dummy", + aws_secret_access_key="dummy", + ) + self.logger.debug(f"endpoint_url: {localstack_endpoint}") + else: + client = boto3.client("s3") + + try: + response = client.get_object(Bucket=bucket, Key=key) + self.logger.info("S3 object read successfully") + content = response["Body"].read().decode("utf-8") + return self.read_string(content) + except client.exceptions.NoSuchKey: + self.logger.error(f"Object not found: {bucket}/{key}") + raise + except client.exceptions.ClientError as e: + self.logger.error(f"Error reading S3 object: {e}") + raise + except UnicodeDecodeError as e: + self.logger.error(f"Error decoding S3 object: {e}") + raise + except Exception as e: + self.logger.error(f"Error reading S3 object: {e}") + raise + + def read_string(self, content: str) -> List[Dict[str, str]]: + """ + A method to read CSV data from a string and return the data as a list + of dictionaries. + """ + if not content.strip(): + return [] + + f = io.StringIO(content) + reader = csv.DictReader(f) + return [dict(row) for row in reader] diff --git a/test/test_csv_reader.py b/test/test_csv_reader.py deleted file mode 100644 index d6129e7..0000000 --- a/test/test_csv_reader.py +++ /dev/null @@ -1,181 +0,0 @@ -import boto3 -from moto import mock_aws -from obfuscator.csv_reader import CSVReader -import pytest - -reader = CSVReader(log_level="DEBUG") - - -def test_empty_csv_should_return_no_content(): - content = "" - result = reader.read_string(content) - expected = [] - assert result == expected - - -def test_csv_with_header_only_should_return_no_content(): - content = "student_id,name,course\n" - result = reader.read_string(content) - expected = [] - assert result == expected - - -def test_csv_with_valid_data(): - content = ( - "student_id,name,course\n" - "1234,Student 1,Course 1\n" - "5678,Student 2,Course 2\n" - ) - result = reader.read_string(content) - expected = [ - {"student_id": "1234", "name": "Student 1", "course": "Course 1"}, - {"student_id": "5678", "name": "Student 2", "course": "Course 2"}, - ] - assert result == expected - - -def test_csv_with_quoted_fields_should_run_as_expected(): - content = ( - "student_id,name,course\n" - '1234,"Student 1","Course 1"\n' - '5678,"Student 2","Course 2"\n' - ) - result = reader.read_string(content) - expected = [ - {"student_id": "1234", "name": "Student 1", "course": "Course 1"}, - {"student_id": "5678", "name": "Student 2", "course": "Course 2"}, - ] - assert result == expected - - -def setup_s3(s3_client, bucket: str, key: str, content: str): - s3_client.create_bucket( - Bucket=bucket, - CreateBucketConfiguration={"LocationConstraint": "eu-west-2"}, - ) - s3_client.put_object(Bucket=bucket, Key=key, Body=content) - - -@pytest.fixture(autouse=True) -def s3_client(): - with mock_aws(): - yield boto3.client("s3", "eu-west-2") - - -def test_read_s3_valid_csv_returns_expected(): - with mock_aws(): - s3 = boto3.client("s3", region_name="eu-west-2") - bucket = "test-bucket" - key = "data/mock.csv" - - csv_content = ( - "student_id,name,course\n" - "1234,Student 1,Course 1\n" - "5678,Student 2,Course 2\n" - ) - - setup_s3(s3, bucket, key, csv_content) - path = f"s3://{bucket}/{key}" - - data = reader.read_s3(path) - - expected = [ - {"student_id": "1234", "name": "Student 1", "course": "Course 1"}, - {"student_id": "5678", "name": "Student 2", "course": "Course 2"}, - ] - - assert data == expected - - -def test_read_s3_empty_csv_returns_empty_list(): - with mock_aws(): - s3 = boto3.client("s3", region_name="eu-west-2") - bucket = "empty-bucket" - key = "data/empty.csv" - csv_content = "student_id,name,course\n" - setup_s3(s3, bucket, key, csv_content) - path = f"s3://{bucket}/{key}" - - data = reader.read_s3(path) - assert data == [] - - -def test_read_s3_nonexistent_bucket_raises_exception(): - with mock_aws(): - bucket = "nonexistent-bucket" - key = "data/mock.csv" - path = f"s3://{bucket}/{key}" - with pytest.raises(Exception): - reader.read_s3(path) - - -def test_read_s3_nonexistent_key_raises_exception(): - with mock_aws(): - s3 = boto3.client("s3", region_name="eu-west-2") - bucket = "test-bucket" - s3.create_bucket( - Bucket=bucket, - CreateBucketConfiguration={"LocationConstraint": "eu-west-2"}, - ) - key = "data/nonexistent.csv" - path = f"s3://{bucket}/{key}" - with pytest.raises(Exception): - reader.read_s3(path) - - -def test_read_s3_malformed_csv_returns_expected(): - with mock_aws(): - s3 = boto3.client("s3", region_name="eu-west-2") - bucket = "test-bucket" - key = "data/malformed.csv" - csv_content = "1234,Student 1,Course 1\n" "5678,Student 2,Course 2\n" - setup_s3(s3, bucket, key, csv_content) - path = f"s3://{bucket}/{key}" - - data = reader.read_s3(path) - expected = [{"1234": "5678", "Student 1": "Student 2", "Course 1": "Course 2"}] - assert data == expected - - -def test_read_s3_csv_with_extra_empty_lines(): - with mock_aws(): - s3 = boto3.client("s3", region_name="eu-west-2") - bucket = "test-bucket" - key = "data/extra_lines.csv" - csv_content = ( - "student_id,name,course\n" - "1234,Student 1,Course 1\n" - "\n" - "5678,Student 2,Course 2\n" - "\n" - ) - setup_s3(s3, bucket, key, csv_content) - path = f"s3://{bucket}/{key}" - - data = reader.read_s3(path) - expected = [ - {"student_id": "1234", "name": "Student 1", "course": "Course 1"}, - {"student_id": "5678", "name": "Student 2", "course": "Course 2"}, - ] - assert data == expected - - -def test_read_s3_csv_with_whitespace_in_fields(): - with mock_aws(): - s3 = boto3.client("s3", region_name="eu-west-2") - bucket = "test-bucket" - key = "data/whitespace.csv" - csv_content = ( - "student_id, name , course \n" - " 1234 , Student 1 , Course 1 \n" - "5678,Student 2,Course 2\n" - ) - setup_s3(s3, bucket, key, csv_content) - path = f"s3://{bucket}/{key}" - - data = reader.read_s3(path) - expected = [ - {"student_id": " 1234 ", " name ": " Student 1 ", " course ": " Course 1 "}, - {"student_id": "5678", " name ": "Student 2", " course ": "Course 2"}, - ] - assert data == expected diff --git a/test/test_read.py b/test/test_read.py new file mode 100644 index 0000000..903ab5d --- /dev/null +++ b/test/test_read.py @@ -0,0 +1,181 @@ +import boto3 +from moto import mock_aws +from obfuscator.read import CSVReader +import pytest + +reader = CSVReader(log_level="DEBUG") + + +def test_empty_csv_should_return_no_content(): + content = "" + result = reader.read_string(content) + expected = [] + assert result == expected + + +def test_csv_with_header_only_should_return_no_content(): + content = "student_id,name,course\n" + result = reader.read_string(content) + expected = [] + assert result == expected + + +def test_csv_with_valid_data(): + content = ( + "student_id,name,course\n" + "1234,Student 1,Course 1\n" + "5678,Student 2,Course 2\n" + ) + result = reader.read_string(content) + expected = [ + {"student_id": "1234", "name": "Student 1", "course": "Course 1"}, + {"student_id": "5678", "name": "Student 2", "course": "Course 2"}, + ] + assert result == expected + + +def test_csv_with_quoted_fields_should_run_as_expected(): + content = ( + "student_id,name,course\n" + '1234,"Student 1","Course 1"\n' + '5678,"Student 2","Course 2"\n' + ) + result = reader.read_string(content) + expected = [ + {"student_id": "1234", "name": "Student 1", "course": "Course 1"}, + {"student_id": "5678", "name": "Student 2", "course": "Course 2"}, + ] + assert result == expected + + +def setup_s3(s3_client, bucket: str, key: str, content: str): + s3_client.create_bucket( + Bucket=bucket, + CreateBucketConfiguration={"LocationConstraint": "eu-west-2"}, + ) + s3_client.put_object(Bucket=bucket, Key=key, Body=content) + + +@pytest.fixture(autouse=True) +def s3_client(): + with mock_aws(): + yield boto3.client("s3", "eu-west-2") + + +def test_read_s3_valid_csv_returns_expected(): + with mock_aws(): + s3 = boto3.client("s3", region_name="eu-west-2") + bucket = "test-bucket" + key = "data/mock.csv" + + csv_content = ( + "student_id,name,course\n" + "1234,Student 1,Course 1\n" + "5678,Student 2,Course 2\n" + ) + + setup_s3(s3, bucket, key, csv_content) + path = f"s3://{bucket}/{key}" + + data = reader.read_s3(path) + + expected = [ + {"student_id": "1234", "name": "Student 1", "course": "Course 1"}, + {"student_id": "5678", "name": "Student 2", "course": "Course 2"}, + ] + + assert data == expected + + +def test_read_s3_empty_csv_returns_empty_list(): + with mock_aws(): + s3 = boto3.client("s3", region_name="eu-west-2") + bucket = "empty-bucket" + key = "data/empty.csv" + csv_content = "student_id,name,course\n" + setup_s3(s3, bucket, key, csv_content) + path = f"s3://{bucket}/{key}" + + data = reader.read_s3(path) + assert data == [] + + +def test_read_s3_nonexistent_bucket_raises_exception(): + with mock_aws(): + bucket = "nonexistent-bucket" + key = "data/mock.csv" + path = f"s3://{bucket}/{key}" + with pytest.raises(Exception): + reader.read_s3(path) + + +def test_read_s3_nonexistent_key_raises_exception(): + with mock_aws(): + s3 = boto3.client("s3", region_name="eu-west-2") + bucket = "test-bucket" + s3.create_bucket( + Bucket=bucket, + CreateBucketConfiguration={"LocationConstraint": "eu-west-2"}, + ) + key = "data/nonexistent.csv" + path = f"s3://{bucket}/{key}" + with pytest.raises(Exception): + reader.read_s3(path) + + +def test_read_s3_malformed_csv_returns_expected(): + with mock_aws(): + s3 = boto3.client("s3", region_name="eu-west-2") + bucket = "test-bucket" + key = "data/malformed.csv" + csv_content = "1234,Student 1,Course 1\n" "5678,Student 2,Course 2\n" + setup_s3(s3, bucket, key, csv_content) + path = f"s3://{bucket}/{key}" + + data = reader.read_s3(path) + expected = [{"1234": "5678", "Student 1": "Student 2", "Course 1": "Course 2"}] + assert data == expected + + +def test_read_s3_csv_with_extra_empty_lines(): + with mock_aws(): + s3 = boto3.client("s3", region_name="eu-west-2") + bucket = "test-bucket" + key = "data/extra_lines.csv" + csv_content = ( + "student_id,name,course\n" + "1234,Student 1,Course 1\n" + "\n" + "5678,Student 2,Course 2\n" + "\n" + ) + setup_s3(s3, bucket, key, csv_content) + path = f"s3://{bucket}/{key}" + + data = reader.read_s3(path) + expected = [ + {"student_id": "1234", "name": "Student 1", "course": "Course 1"}, + {"student_id": "5678", "name": "Student 2", "course": "Course 2"}, + ] + assert data == expected + + +def test_read_s3_csv_with_whitespace_in_fields(): + with mock_aws(): + s3 = boto3.client("s3", region_name="eu-west-2") + bucket = "test-bucket" + key = "data/whitespace.csv" + csv_content = ( + "student_id, name , course \n" + " 1234 , Student 1 , Course 1 \n" + "5678,Student 2,Course 2\n" + ) + setup_s3(s3, bucket, key, csv_content) + path = f"s3://{bucket}/{key}" + + data = reader.read_s3(path) + expected = [ + {"student_id": " 1234 ", " name ": " Student 1 ", " course ": " Course 1 "}, + {"student_id": "5678", " name ": "Student 2", " course ": "Course 2"}, + ] + assert data == expected -- cgit v1.2.3 From 1608d01bb68c1f6292b04c70caa609d34943b371 Mon Sep 17 00:00:00 2001 From: Alex Schofield Date: Wed, 19 Feb 2025 15:37:57 +0000 Subject: rename write function & update references --- obfuscator/csv_writer.py | 23 ------------------- obfuscator/write.py | 23 +++++++++++++++++++ test/test_csv_writer.py | 57 ------------------------------------------------ test/test_write.py | 57 ++++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 80 insertions(+), 80 deletions(-) delete mode 100644 obfuscator/csv_writer.py create mode 100644 obfuscator/write.py delete mode 100644 test/test_csv_writer.py create mode 100644 test/test_write.py (limited to 'test') diff --git a/obfuscator/csv_writer.py b/obfuscator/csv_writer.py deleted file mode 100644 index de7cd4b..0000000 --- a/obfuscator/csv_writer.py +++ /dev/null @@ -1,23 +0,0 @@ -import csv -import io -from typing import List, Dict -from obfuscator.logger import get_logger - -logger = get_logger("CSVWRITER") - - -def create_byte_stream(data: List[Dict[str, str]]) -> bytes: - if not data: - logger.error("Invalid or empty data was provided to write") - - output = io.StringIO() - - headers = list(data[0].keys()) - - writer = csv.DictWriter(output, fieldnames=headers) - writer.writeheader() - writer.writerows(data) - - csv_string = output.getvalue() - - return csv_string.encode("utf-8") diff --git a/obfuscator/write.py b/obfuscator/write.py new file mode 100644 index 0000000..de7cd4b --- /dev/null +++ b/obfuscator/write.py @@ -0,0 +1,23 @@ +import csv +import io +from typing import List, Dict +from obfuscator.logger import get_logger + +logger = get_logger("CSVWRITER") + + +def create_byte_stream(data: List[Dict[str, str]]) -> bytes: + if not data: + logger.error("Invalid or empty data was provided to write") + + output = io.StringIO() + + headers = list(data[0].keys()) + + writer = csv.DictWriter(output, fieldnames=headers) + writer.writeheader() + writer.writerows(data) + + csv_string = output.getvalue() + + return csv_string.encode("utf-8") diff --git a/test/test_csv_writer.py b/test/test_csv_writer.py deleted file mode 100644 index eceac28..0000000 --- a/test/test_csv_writer.py +++ /dev/null @@ -1,57 +0,0 @@ -import io -import csv -from obfuscator.csv_writer import create_byte_stream - - -def csv_bytes_to_list(csv_bytes: bytes): - csv_string = csv_bytes.decode("utf-8") - f = io.StringIO(csv_string) - reader = csv.DictReader(f) - return [dict(row) for row in reader] - - -def test_create_byte_stream_valid_data(): - data = [ - {"student_id": "1234", "name": "Student 1", "course": "Course 1"}, - {"student_id": "5678", "name": "Student 2", "course": "Course 2"}, - ] - csv_bytes = create_byte_stream(data) - result = csv_bytes_to_list(csv_bytes) - assert result == data - - -def test_create_byte_stream_empty_data(): - csv_bytes = create_byte_stream([]) - assert csv_bytes == b"" - - -def test_create_byte_stream_handles_quoted_fields(): - data = [ - {"student_id": "1234", "name": 'Student "One"', "course": "Course, A"}, - {"student_id": "5678", "name": 'Student "Two"', "course": "Course, B"}, - ] - csv_bytes = create_byte_stream(data) - result = csv_bytes_to_list(csv_bytes) - assert result == data - - -def test_create_byte_stream_consistent_header_order(): - data = [ - {"student_id": "1234", "name": "Alice", "course": "Math"}, - {"student_id": "5678", "name": "Bob", "course": "Science"}, - ] - csv_bytes = create_byte_stream(data) - csv_string = csv_bytes.decode("utf-8") - header_line = csv_string.splitlines()[0] - expected_header = ",".join(data[0].keys()) - assert header_line == expected_header - - -def test_create_byte_stream_special_characters(): - data = [ - {"student_id": "1234", "name": "Student 1", "course": "Line1\nLine2"}, - {"student_id": "5678", "name": "Student 2", "course": "Value with, comma"}, - ] - csv_bytes = create_byte_stream(data) - result = csv_bytes_to_list(csv_bytes) - assert result == data diff --git a/test/test_write.py b/test/test_write.py new file mode 100644 index 0000000..eceac28 --- /dev/null +++ b/test/test_write.py @@ -0,0 +1,57 @@ +import io +import csv +from obfuscator.csv_writer import create_byte_stream + + +def csv_bytes_to_list(csv_bytes: bytes): + csv_string = csv_bytes.decode("utf-8") + f = io.StringIO(csv_string) + reader = csv.DictReader(f) + return [dict(row) for row in reader] + + +def test_create_byte_stream_valid_data(): + data = [ + {"student_id": "1234", "name": "Student 1", "course": "Course 1"}, + {"student_id": "5678", "name": "Student 2", "course": "Course 2"}, + ] + csv_bytes = create_byte_stream(data) + result = csv_bytes_to_list(csv_bytes) + assert result == data + + +def test_create_byte_stream_empty_data(): + csv_bytes = create_byte_stream([]) + assert csv_bytes == b"" + + +def test_create_byte_stream_handles_quoted_fields(): + data = [ + {"student_id": "1234", "name": 'Student "One"', "course": "Course, A"}, + {"student_id": "5678", "name": 'Student "Two"', "course": "Course, B"}, + ] + csv_bytes = create_byte_stream(data) + result = csv_bytes_to_list(csv_bytes) + assert result == data + + +def test_create_byte_stream_consistent_header_order(): + data = [ + {"student_id": "1234", "name": "Alice", "course": "Math"}, + {"student_id": "5678", "name": "Bob", "course": "Science"}, + ] + csv_bytes = create_byte_stream(data) + csv_string = csv_bytes.decode("utf-8") + header_line = csv_string.splitlines()[0] + expected_header = ",".join(data[0].keys()) + assert header_line == expected_header + + +def test_create_byte_stream_special_characters(): + data = [ + {"student_id": "1234", "name": "Student 1", "course": "Line1\nLine2"}, + {"student_id": "5678", "name": "Student 2", "course": "Value with, comma"}, + ] + csv_bytes = create_byte_stream(data) + result = csv_bytes_to_list(csv_bytes) + assert result == data -- cgit v1.2.3 From ef3f16de8d93821d54344d5cdd16d8deee0b016c Mon Sep 17 00:00:00 2001 From: Alex Schofield Date: Wed, 19 Feb 2025 15:40:39 +0000 Subject: wrap write functions in class and update references --- cli.py | 6 ++++-- obfuscator/write.py | 24 ++++++++++++++---------- test/test_write.py | 2 +- 3 files changed, 19 insertions(+), 13 deletions(-) (limited to 'test') diff --git a/cli.py b/cli.py index 5100e2b..bd49707 100644 --- a/cli.py +++ b/cli.py @@ -1,8 +1,8 @@ import argparse from obfuscator.read import DataReader +from obfuscator.write import DataWriter from obfuscator.obfuscate import obfuscate from obfuscator.logger import get_logger -from obfuscator.csv_writer import create_byte_stream def main(): @@ -43,7 +43,9 @@ def main(): obfuscated_data = obfuscate(data, args.pii) - return create_byte_stream(obfuscated_data) + writer = DataWriter() + + return writer.create_byte_stream(obfuscated_data) if __name__ == "__main__": diff --git a/obfuscator/write.py b/obfuscator/write.py index de7cd4b..4081f0f 100644 --- a/obfuscator/write.py +++ b/obfuscator/write.py @@ -6,18 +6,22 @@ from obfuscator.logger import get_logger logger = get_logger("CSVWRITER") -def create_byte_stream(data: List[Dict[str, str]]) -> bytes: - if not data: - logger.error("Invalid or empty data was provided to write") +class DataWriter: + def __init__(self): + pass - output = io.StringIO() + def create_byte_stream(self, data: List[Dict[str, str]]) -> bytes: + if not data: + logger.error("Invalid or empty data was provided to write") - headers = list(data[0].keys()) + output = io.StringIO() - writer = csv.DictWriter(output, fieldnames=headers) - writer.writeheader() - writer.writerows(data) + headers = list(data[0].keys()) - csv_string = output.getvalue() + writer = csv.DictWriter(output, fieldnames=headers) + writer.writeheader() + writer.writerows(data) - return csv_string.encode("utf-8") + csv_string = output.getvalue() + + return csv_string.encode("utf-8") diff --git a/test/test_write.py b/test/test_write.py index eceac28..f339799 100644 --- a/test/test_write.py +++ b/test/test_write.py @@ -1,6 +1,6 @@ import io import csv -from obfuscator.csv_writer import create_byte_stream +from obfuscator.write import create_byte_stream def csv_bytes_to_list(csv_bytes: bytes): -- cgit v1.2.3 From ef204e7edabb7a3de2a747d83ae6a472b692fb38 Mon Sep 17 00:00:00 2001 From: Alex Schofield Date: Wed, 19 Feb 2025 15:43:08 +0000 Subject: partially fix tests by updating references with new class --- test/test_write.py | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) (limited to 'test') diff --git a/test/test_write.py b/test/test_write.py index f339799..4929b06 100644 --- a/test/test_write.py +++ b/test/test_write.py @@ -1,6 +1,8 @@ import io import csv -from obfuscator.write import create_byte_stream +from obfuscator.write import DataWriter + +writer = DataWriter() def csv_bytes_to_list(csv_bytes: bytes): @@ -15,13 +17,14 @@ def test_create_byte_stream_valid_data(): {"student_id": "1234", "name": "Student 1", "course": "Course 1"}, {"student_id": "5678", "name": "Student 2", "course": "Course 2"}, ] - csv_bytes = create_byte_stream(data) + csv_bytes = writer.create_byte_stream(data) result = csv_bytes_to_list(csv_bytes) assert result == data def test_create_byte_stream_empty_data(): - csv_bytes = create_byte_stream([]) + data = [] + csv_bytes = writer.create_byte_stream(data) assert csv_bytes == b"" @@ -30,7 +33,7 @@ def test_create_byte_stream_handles_quoted_fields(): {"student_id": "1234", "name": 'Student "One"', "course": "Course, A"}, {"student_id": "5678", "name": 'Student "Two"', "course": "Course, B"}, ] - csv_bytes = create_byte_stream(data) + csv_bytes = writer.create_byte_stream(data) result = csv_bytes_to_list(csv_bytes) assert result == data @@ -40,7 +43,7 @@ def test_create_byte_stream_consistent_header_order(): {"student_id": "1234", "name": "Alice", "course": "Math"}, {"student_id": "5678", "name": "Bob", "course": "Science"}, ] - csv_bytes = create_byte_stream(data) + csv_bytes = writer.create_byte_stream(data) csv_string = csv_bytes.decode("utf-8") header_line = csv_string.splitlines()[0] expected_header = ",".join(data[0].keys()) @@ -52,6 +55,6 @@ def test_create_byte_stream_special_characters(): {"student_id": "1234", "name": "Student 1", "course": "Line1\nLine2"}, {"student_id": "5678", "name": "Student 2", "course": "Value with, comma"}, ] - csv_bytes = create_byte_stream(data) + csv_bytes = writer.create_byte_stream(data) result = csv_bytes_to_list(csv_bytes) assert result == data -- cgit v1.2.3 From 16318c2421e8b8f000cd46da0bd306da3783cac3 Mon Sep 17 00:00:00 2001 From: Alex Schofield Date: Wed, 19 Feb 2025 15:44:18 +0000 Subject: fix read tests by updating name of imported DataReader class --- test/test_read.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'test') diff --git a/test/test_read.py b/test/test_read.py index 903ab5d..de425ce 100644 --- a/test/test_read.py +++ b/test/test_read.py @@ -1,9 +1,9 @@ import boto3 from moto import mock_aws -from obfuscator.read import CSVReader +from obfuscator.read import DataReader import pytest -reader = CSVReader(log_level="DEBUG") +reader = DataReader(log_level="DEBUG") def test_empty_csv_should_return_no_content(): -- cgit v1.2.3