aboutsummaryrefslogtreecommitdiffstats
path: root/test/test_core.py
blob: bb28607d064db8f99f0d3203d67e45b3688cc6a5 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
from gdpr_obfuscator import Obfuscator
import pytest
from moto import mock_aws
import boto3
import csv
import random
import json
import time

obfuscator = Obfuscator()


def setup_s3(s3_client, bucket: str, key: str, content: str):
    s3_client.create_bucket(
        Bucket=bucket,
        CreateBucketConfiguration={"LocationConstraint": "eu-west-2"},
    )
    s3_client.put_object(Bucket=bucket, Key=key, Body=content)


@pytest.fixture(autouse=True)
def s3_client():
    with mock_aws():
        yield boto3.client("s3", "eu-west-2")


def test_imported_module_runs_successfully_with_local_data():
    with mock_aws():
        s3 = boto3.client("s3", region_name="eu-west-2")
        bucket = "test-bucket"
        key = "data/mock.csv"

        with open("test/data/mock_data.csv", "r") as f:
            csv_content = f.read()

        with open("test/data/mock_data.csv", "r") as f:
            reader = list(csv.DictReader(f))
            rand_row = random.randint(0, len(reader) - 1)
            rand_name = reader[rand_row]["name"]

        setup_s3(s3, bucket, key, csv_content)

        path = f"s3://{bucket}/{key}"

    json_input = json.dumps({"file_path": path, "pii_fields": ["name"]})

    result = obfuscator.process_s3(json_input)
    result_str = result.decode("utf-8")

    assert rand_name not in result_str


def test_imported_module_completes_in_under_one_minute():
    with mock_aws():
        s3 = boto3.client("s3", region_name="eu-west-2")
        bucket = "test-bucket"
        key = "data/large_dataset.csv"

        with open("test/data/large_dataset.csv", "r") as f:
            csv_content = f.read()

        setup_s3(s3, bucket, key, csv_content)

        path = f"s3://{bucket}/{key}"

    json_input = json.dumps(
        {"file_path": path, "pii_fields": ["full_name", "email_address"]}
    )

    start = time.time()
    obfuscator.process_s3(json_input)
    end = time.time()

    assert end - start < 60
git.ajschof.me — hosted by ajschofield — powered by cgit