aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authordeepsource-autofix[bot] <62050782+deepsource-autofix[bot]@users.noreply.github.com>2024-08-28 09:59:43 +0000
committerGitHub <noreply@github.com>2024-08-28 09:59:43 +0000
commit3f24ec753902feecec4c17e2877e19853bde1bb2 (patch)
tree6abaf8c3c829d221ec5f9d48a25c79066f7a4f31
parentad357ff34202827720dc216562dfbb0fbd65c297 (diff)
downloadde-project-bentley-3f24ec753902feecec4c17e2877e19853bde1bb2.tar.gz
de-project-bentley-3f24ec753902feecec4c17e2877e19853bde1bb2.zip
style: format code with Autopep8, Black and Ruff Formatter
This commit fixes the style issues introduced in ad357ff according to the output from Autopep8, Black and Ruff Formatter. Details: https://github.com/ajschofield/de-project-bentley/pull/105
-rw-r--r--src/transform_lambda.py40
-rw-r--r--tests/test_transform_lambda.py73
2 files changed, 55 insertions, 58 deletions
diff --git a/src/transform_lambda.py b/src/transform_lambda.py
index 9830e0f..3b1e9e6 100644
--- a/src/transform_lambda.py
+++ b/src/transform_lambda.py
@@ -11,6 +11,7 @@ from pg8000.native import Connection, InterfaceError
from datetime import datetime
import io
+
class DBConnectionException(Exception):
"""Wraps pg8000.native Error or DatabaseError."""
@@ -108,7 +109,7 @@ def process_to_parquet_and_upload_to_s3(
immutable_df_dict,
mutable_df_dict,
bucket,
- client=boto3.client("s3")
+ client=boto3.client("s3"),
):
status = {"uploaded": [], "not_uploaded": []}
@@ -117,13 +118,14 @@ def process_to_parquet_and_upload_to_s3(
status["not_uploaded"].append(table_name)
else:
parquet_buffer = io.BytesIO()
-
- df.to_parquet(parquet_buffer, engine="pyarrow") # or engine="fastparquet"
-
+
+ # or engine="fastparquet"
+ df.to_parquet(parquet_buffer, engine="pyarrow")
+
parquet_buffer.seek(0)
-
+
client.upload_fileobj(parquet_buffer, bucket, f"{table_name}.parquet")
-
+
status["uploaded"].append(table_name)
# for table_name, df in mutable_df_dict.items():
@@ -188,23 +190,17 @@ def read_from_s3_subfolder_to_df(tables, bucket, client=boto3.client("s3")):
return table_dfs
-
-
def bucket_name(bucket_prefix, client=boto3.client("s3")):
-
- response = client.list_buckets()
- bucket_filter = [
- bucket["Name"]
- for bucket in response["Buckets"]
- if bucket_prefix in bucket["Name"]
- ]
- if not bucket_filter:
- raise ValueError(f"No bucket found with prefix: {bucket_prefix}")
-
- return bucket_filter[0]
-
-
-
+ response = client.list_buckets()
+ bucket_filter = [
+ bucket["Name"]
+ for bucket in response["Buckets"]
+ if bucket_prefix in bucket["Name"]
+ ]
+ if not bucket_filter:
+ raise ValueError(f"No bucket found with prefix: {bucket_prefix}")
+
+ return bucket_filter[0]
def list_existing_s3_files(bucket_name, client=boto3.client("s3")):
diff --git a/tests/test_transform_lambda.py b/tests/test_transform_lambda.py
index b4836c2..6cf3a09 100644
--- a/tests/test_transform_lambda.py
+++ b/tests/test_transform_lambda.py
@@ -1,7 +1,8 @@
from src.transform_lambda import (
read_from_s3_subfolder_to_df,
list_existing_s3_files,
- bucket_name, process_to_parquet_and_upload_to_s3
+ bucket_name,
+ process_to_parquet_and_upload_to_s3,
)
from moto import mock_aws
import pytest
@@ -33,28 +34,30 @@ def s3_client(aws_credentials):
with mock_aws():
yield boto3.client("s3")
+
@pytest.fixture(scope="class")
def mock_extract_bucket(s3_client):
mock_extract_bucket = s3_client.create_bucket(
- Bucket="dummy_extract_buc",
- CreateBucketConfiguration={"LocationConstraint": "eu-west-2"},
- )
+ Bucket="dummy_extract_buc",
+ CreateBucketConfiguration={"LocationConstraint": "eu-west-2"},
+ )
return mock_extract_bucket
-
+
+
@pytest.fixture(scope="class")
def mock_transform_bucket(s3_client):
mock_transform_bucket = s3_client.create_bucket(
- Bucket="dummy_transform_buc",
- CreateBucketConfiguration={"LocationConstraint": "eu-west-2"},
- )
+ Bucket="dummy_transform_buc",
+ CreateBucketConfiguration={"LocationConstraint": "eu-west-2"},
+ )
return mock_transform_bucket
-
class TestReadFromS3:
# @pytest.mark.skip(reason="The test is broken!")
- def test_returns_dictionary_with_correct_value_pair(self, s3_client, mock_extract_bucket):
-
+ def test_returns_dictionary_with_correct_value_pair(
+ self, s3_client, mock_extract_bucket
+ ):
s3_client.upload_file(
"tests/dummy_identical.csv",
"dummy_extract_buc",
@@ -80,9 +83,13 @@ class TestReadFromS3:
assert result["Foods"].eq(expected_df, axis="columns").all(axis=None)
# @pytest.mark.skip(reason="The test is broken!")
- def test_returns_dictionary_of_dataframes_for_multiple_tables(self, s3_client, mock_extract_bucket):
+ def test_returns_dictionary_of_dataframes_for_multiple_tables(
+ self, s3_client, mock_extract_bucket
+ ):
s3_client.upload_file(
- "tests/dummy_2.csv", "dummy_extract_buc", "Cars/2024/08/21/Cars_14:03:56.csv"
+ "tests/dummy_2.csv",
+ "dummy_extract_buc",
+ "Cars/2024/08/21/Cars_14:03:56.csv",
)
tables = ["Foods", "Cars"]
result = read_from_s3_subfolder_to_df(
@@ -143,30 +150,28 @@ class TestListExistingFiles:
class TestBucketName:
- def test_functions_retrieves__extractbucket(self, mock_extract_bucket, mock_transform_bucket,s3_client):
-
+ def test_functions_retrieves__extractbucket(
+ self, mock_extract_bucket, mock_transform_bucket, s3_client
+ ):
bucket = bucket_name("dummy_extract_buc", s3_client)
assert bucket == "dummy_extract_buc"
+ def test_transform_bucket_name(
+ self, mock_extract_bucket, mock_transform_bucket, s3_client
+ ):
+ bucket2 = bucket_name("dummy_transform_buc", s3_client)
+ assert bucket2 == "dummy_transform_buc"
- def test_transform_bucket_name(self, mock_extract_bucket, mock_transform_bucket, s3_client):
- bucket2 = bucket_name('dummy_transform_buc', s3_client)
- assert bucket2 == 'dummy_transform_buc'
-
-
- def test_recieves_error_when_bucket_doesnt_exist(self, mock_extract_bucket, s3_client):
- s3_client.delete_bucket(Bucket='dummy_extract_buc')
+ def test_recieves_error_when_bucket_doesnt_exist(
+ self, mock_extract_bucket, s3_client
+ ):
+ s3_client.delete_bucket(Bucket="dummy_extract_buc")
with pytest.raises(ValueError):
- bucket_name('dummy_extract_buc', s3_client)
-
-
-
-
+ bucket_name("dummy_extract_buc", s3_client)
class TestProcessToParquetUploadS3:
def test_func_uploads_to_s3(self, mock_transform_bucket, s3_client):
-
expected_cars_df = pd.DataFrame(
np.array(
[
@@ -177,14 +182,10 @@ class TestProcessToParquetUploadS3:
),
columns=["Car_type", "Brand", "Colour"],
)
- mock_dim_dict = {'car_data': expected_cars_df}
-
- response = process_to_parquet_and_upload_to_s3([], mock_dim_dict, {}, mock_transform_bucket, s3_client)
+ mock_dim_dict = {"car_data": expected_cars_df}
+ response = process_to_parquet_and_upload_to_s3(
+ [], mock_dim_dict, {}, mock_transform_bucket, s3_client
+ )
assert response == {"uploaded": ["car_data"], "not_uploaded": []}
-
-
-
-
-
git.ajschof.me — hosted by ajschofield — powered by cgit