aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorlian-manonog <lian.manonog@gmail.com>2024-08-23 16:22:52 +0100
committerlian-manonog <lian.manonog@gmail.com>2024-08-23 16:22:52 +0100
commita69fe58b47bcc5ad02986bcf404f060774aec9a7 (patch)
tree2cd62b799bee93156c61024c5e79e99dcaa4aa37
parent5c236c6ee0d0b7e0612a51bf8eaa59322d192a6e (diff)
downloadde-project-bentley-a69fe58b47bcc5ad02986bcf404f060774aec9a7.tar.gz
de-project-bentley-a69fe58b47bcc5ad02986bcf404f060774aec9a7.zip
wip: pushing again
-rw-r--r--src/dataframes.py12
-rw-r--r--src/transform_lambda.py1
-rw-r--r--tests/test_transform_lambda.py43
3 files changed, 47 insertions, 9 deletions
diff --git a/src/dataframes.py b/src/dataframes.py
index 684f102..18e1fac 100644
--- a/src/dataframes.py
+++ b/src/dataframes.py
@@ -1,11 +1,11 @@
import pandas as pd
from bs4 import BeautifulSoup
-from src.transform_lambda import read_from_s3_subfolder_to_df, tables
-from src.extract_lambda import extract_bucket
-import json
-import boto3
-import re
-from datetime import datetime as dt
+# from src.transform_lambda import read_from_s3_subfolder_to_df, tables
+# from src.extract_lambda import extract_bucket
+# import json
+# import boto3
+# import re
+# from datetime import datetime as dt
import requests
# Table names:
diff --git a/src/transform_lambda.py b/src/transform_lambda.py
index defa15d..7677f66 100644
--- a/src/transform_lambda.py
+++ b/src/transform_lambda.py
@@ -207,5 +207,6 @@ def list_existing_s3_files(bucket_name, client=boto3.client("s3")):
except ClientError as e:
logger.error(f"Error listing S3 objects: {e}")
+ raise e
return existing_files
diff --git a/tests/test_transform_lambda.py b/tests/test_transform_lambda.py
index 37ca08f..06235f7 100644
--- a/tests/test_transform_lambda.py
+++ b/tests/test_transform_lambda.py
@@ -1,12 +1,19 @@
-from src.transform_lambda import read_from_s3_subfolder_to_df
+from src.transform_lambda import read_from_s3_subfolder_to_df, list_existing_s3_files
from moto import mock_aws
import pytest
import pandas as pd
import os
import boto3
+from botocore.exceptions import ClientError
import numpy as np
+# import caplog
+import logging
+
+logger = logging.getLogger()
+logger.setLevel(logging.INFO)
+
@pytest.fixture(scope="class")
def aws_credentials():
os.environ["AWS_ACCESS_KEY_ID"] = "testing"
@@ -23,7 +30,7 @@ def s3_client(aws_credentials):
class TestReadFromS3:
- @pytest.mark.skip(reason="The test is broken!")
+ # @pytest.mark.skip(reason="The test is broken!")
def test_returns_dictionary_with_correct_value_pair(self, s3_client):
s3_client.create_bucket(
Bucket="dummy_buc",
@@ -53,7 +60,7 @@ class TestReadFromS3:
assert isinstance(result["Foods"], pd.DataFrame)
assert result["Foods"].eq(expected_df, axis="columns").all(axis=None)
- @pytest.mark.skip(reason="The test is broken!")
+ # @pytest.mark.skip(reason="The test is broken!")
def test_returns_dictionary_of_dataframes_for_multiple_tables(self, s3_client):
s3_client.upload_file(
"tests/dummy_2.csv", "dummy_buc", "Cars/2024/08/21/Cars_14:03:56.csv"
@@ -84,3 +91,33 @@ class TestReadFromS3:
assert list(result.keys()) == tables
assert result["Foods"].eq(expected_foods_df, axis="columns").all(axis=None)
assert result["Cars"].eq(expected_cars_df, axis="columns").all(axis=None)
+
+class TestListExistingFiles:
+ def test_functions_receives_error_if_no_bucket(self, s3_client, caplog):
+ caplog.set_level(logging.INFO)
+
+ with pytest.raises(ClientError):
+ list_existing_s3_files('rando_bucket', client=s3_client)
+
+ assert "Error listing S3 objects: An error occurred (NoSuchBucket) when calling the ListObjectsV2 operation: The specified bucket does not exist" in caplog.text
+
+ def test_recieves_logger_error_if_no_files_listed(self, s3_client, caplog):
+ caplog.set_level(logging.INFO)
+
+ s3_client.create_bucket(
+ Bucket='mock_bucket',
+ CreateBucketConfiguration={"LocationConstraint": "eu-west-2"}
+ )
+ response = list_existing_s3_files('mock_bucket', client=s3_client)
+ assert 'The bucket is empty' in caplog.text
+
+ def test_retrieves_existing_files(self, s3_client, caplog):
+ caplog.set_level(logging.INFO)
+
+ s3_client.upload_file(
+ "tests/dummy.txt", 'mock_bucket', "dummy.txt"
+ )
+ result = list_existing_s3_files('mock_bucket', client=s3_client)
+ assert result == ["dummy.txt"]
+
+ \ No newline at end of file
git.ajschof.me — hosted by ajschofield — powered by cgit