aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAlex <git@ajschof.me>2024-08-19 17:14:19 +0100
committerGitHub <noreply@github.com>2024-08-19 17:14:19 +0100
commited20a8dbeba8b88226e539673a6326e75a73746e (patch)
tree52673e56cc1747eb351982accdbdb58611ee4048
parent58db3807d686a600177f467c96694bec75f0c466 (diff)
parentb499d78dc660017694ec683c90aba3f558c00669 (diff)
downloadde-project-bentley-ed20a8dbeba8b88226e539673a6326e75a73746e.tar.gz
de-project-bentley-ed20a8dbeba8b88226e539673a6326e75a73746e.zip
Merge pull request #65 from ajschofield/feature/test-process-upload-tables-v2
pr: feature/test process upload tables
-rw-r--r--src/extract_lambda.py11
-rw-r--r--tests/dummy_identical.csv6
-rw-r--r--tests/test_extract_lambda.py41
3 files changed, 49 insertions, 9 deletions
diff --git a/src/extract_lambda.py b/src/extract_lambda.py
index 217efdb..15fe785 100644
--- a/src/extract_lambda.py
+++ b/src/extract_lambda.py
@@ -148,12 +148,13 @@ def process_and_upload_tables(db, existing_files, client=boto3.client("s3")):
WHERE table_schema='public' AND table_type='BASE TABLE';"""
)
for table in tables:
+ print(tables)
table_name = table[0]
rows = db.run(
- f"SELECT * FROM {identifier(table_name)} " "WHERE last_updated >= :latest;",
- latest={datetime.strftime(latest_timestamp, "%H-%m-%d %H:%M:%S")},
+ f"SELECT * FROM {identifier(table_name)} WHERE last_updated >= :latest;",
+ latest={datetime.strftime(latest_timestamp, "%Y-%m-%d %H:%M:%S")},
)
-
+ print("rows", rows)
# Creating a temporary file path and writing the column name to it followed by each row of data
if rows:
csv_file_path = f"/tmp/{table_name}.csv"
@@ -183,7 +184,5 @@ def process_and_upload_tables(db, existing_files, client=boto3.client("s3")):
logger.error(f"Error uploading to S3: {e}")
else:
load_status["no change"].append(table_name)
- logger.info(
- f"No new data in {table_name} name. Latest data retrieved is from {latest_timestamp}."
- )
+ logger.info(f"No new data")
return load_status
diff --git a/tests/dummy_identical.csv b/tests/dummy_identical.csv
index fdd8993..e44e9fc 100644
--- a/tests/dummy_identical.csv
+++ b/tests/dummy_identical.csv
@@ -1,4 +1,4 @@
-Food_type,Flavour,Colour
-Vegetable,Sour,Green
-Berry,Sweet,Red
+Food_type,Flavour,Colour,last_updated
+Vegetable,Sour,Green,2022-11-03 14:20:49.962
+Berry,Sweet,Red,2022-11-03 14:20:49.962
diff --git a/tests/test_extract_lambda.py b/tests/test_extract_lambda.py
index a43ae0a..3931cfc 100644
--- a/tests/test_extract_lambda.py
+++ b/tests/test_extract_lambda.py
@@ -14,6 +14,7 @@ from src.extract_lambda import (
DBConnectionException,
lambda_handler,
process_and_upload_tables,
+ retrieve_secrets,
)
@@ -178,3 +179,43 @@ class TestConnectToDatabase:
with pytest.raises(DBConnectionException):
connect_to_database()
assert "Interface error" in caplog.text
+
+
+class TestProcessAndUploadTables:
+ def test_error_process_and_upload_tables(mock_conn, s3_client, caplog):
+ caplog.set_level(logging.INFO)
+
+ # Mock return values for database queries
+ queries = [
+ "SELECT table_name FROM information_schema.tables WHERE table_schema='public' AND table_type='BASE TABLE';",
+ "SELECT * FROM Fruits WHERE last_updated > :latest;",
+ "SELECT column_name FROM INFORMATION_SCHEMA.COLUMNS where table_name = 'Fruits';",
+ ]
+ return_values = [
+ [["Fruits"]],
+ [], # No new rows with a more recent last_updated timestamp
+ [["Food_type"], ["Flavour"], ["Colour"], ["last_updated"]],
+ ]
+ vals = dict(zip(queries, return_values))
+
+ # Patch the database connection and set return values for queries
+ with patch("src.extract_lambda.Connection") as mock_db:
+ mock_db().run.side_effect = return_values
+ s3_key = "Fruits/2024/08/15/Fruits_16:46:30.csv"
+ existing_files = {
+ s3_key: "Food_type,Flavour,Colour,last_updated\nVegetable,Sour,Green,2022-11-03 14:20:49.962\nBerry,Sweet,Red,2022-11-03 14:20:49.962"
+ }
+
+ # Simulate S3 bucket and file setup
+ s3_client.create_bucket(
+ Bucket="test_extract_bucket",
+ CreateBucketConfiguration={"LocationConstraint": "eu-west-2"},
+ )
+ s3_client.upload_file(
+ "tests/dummy_identical.csv", "test_extract_bucket", s3_key
+ )
+
+ # Run the process_and_upload_tables function
+ process_and_upload_tables(mock_db(), existing_files, client=s3_client)
+ # Assert that the log contains "No new data"
+ assert "No new data" in caplog.text
git.ajschof.me — hosted by ajschofield — powered by cgit