aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorbulve-ad <78788030+bulve-ad@users.noreply.github.com>2024-08-21 15:51:03 +0100
committerGitHub <noreply@github.com>2024-08-21 15:51:03 +0100
commitce76bbb2b32b58a93d88db4abdb1bbfbf27243ea (patch)
treeb8e77c62b6a2d50ab04215beb54055d14210a423
parentc8e94530b65d6807b2b9bb246a542963839cce9d (diff)
parentd01d3bed939d7a17ea2205af502baeeb35510b5c (diff)
downloadde-project-bentley-ce76bbb2b32b58a93d88db4abdb1bbfbf27243ea.tar.gz
de-project-bentley-ce76bbb2b32b58a93d88db4abdb1bbfbf27243ea.zip
Merge branch 'development' into feature/transform_lambda
-rw-r--r--.deepsource.toml6
-rw-r--r--.github/workflows/deploy.yml42
-rw-r--r--.github/workflows/dev-tests.yml48
-rw-r--r--.gitignore3
-rw-r--r--Makefile80
-rw-r--r--README.md12
-rwxr-xr-xscripts/deploy.sh56
-rwxr-xr-xscripts/make_layer_zip.sh8
-rw-r--r--src/extract_lambda.py131
-rw-r--r--src/load_lambda.py2
-rw-r--r--src/secrets_manager.py48
-rw-r--r--src/transform_lambda.py1
-rw-r--r--terraform/events.tf44
-rw-r--r--terraform/iam.tf48
-rw-r--r--terraform/lambda.tf141
-rw-r--r--terraform/main.tf20
-rw-r--r--terraform/rds.tf70
-rw-r--r--terraform/s3.tf49
-rw-r--r--terraform/vars.tf15
-rw-r--r--test.py0
-rw-r--r--test/test_secrets_manager.py34
-rw-r--r--tests/dummy_identical.csv6
-rw-r--r--tests/test_extract_lambda.py262
-rw-r--r--tests/test_secrets_manager.py37
24 files changed, 676 insertions, 487 deletions
diff --git a/.deepsource.toml b/.deepsource.toml
index a840b78..42d0973 100644
--- a/.deepsource.toml
+++ b/.deepsource.toml
@@ -1,5 +1,7 @@
version = 1
+test_patterns = ["tests/**"]
+
[[analyzers]]
name = "sql"
@@ -22,6 +24,4 @@ name = "black"
name = "autopep8"
[[transformers]]
-name = "ruff"
-
-
+name = "ruff" \ No newline at end of file
diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml
deleted file mode 100644
index 5672048..0000000
--- a/.github/workflows/deploy.yml
+++ /dev/null
@@ -1,42 +0,0 @@
-name: deploy-terraform
-
-on:
- pull_request:
- branches:
- - main
- push:
- branches:
- - main
-
-
-jobs:
- deploy-terraform:
- name: Deploy Terraform
- runs-on: ubuntu-latest
- #needs: run-checks (must ref on-commit.yml file)
- environment: production
- steps:
- - name: Checkout Repo
- uses: actions/checkout@v4
-
- - name: Install Terraform
- uses: hashicorp/setup-terraform@v3
-
- - name: Configure AWS Credentials
- uses: aws-actions/configure-aws-credentials@v4
- with:
- aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
- aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
- aws-region: ${{ secrets.AWS_REGION }}
-
- - name: Terraform Init
- working-directory: terraform
- run: terraform init
-
- - name: Terraform Plan
- working-directory: terraform
- run: terraform plan
-
- - name: Terraform Apply
- working-directory: terraform
- run: terraform apply --auto-approve
diff --git a/.github/workflows/dev-tests.yml b/.github/workflows/dev-tests.yml
new file mode 100644
index 0000000..d66f1c6
--- /dev/null
+++ b/.github/workflows/dev-tests.yml
@@ -0,0 +1,48 @@
+name: dev-tests
+
+on:
+ pull_request:
+ branches:
+ - development
+ push:
+ branches:
+ - development
+
+jobs:
+ validate-and-test:
+ name: Validate Terraform and Run Tests
+ runs-on: ubuntu-latest
+ steps:
+ - name: Checkout Repo
+ uses: actions/checkout@v4
+
+ - name: Install Terraform
+ uses: hashicorp/setup-terraform@v3
+
+ - name: Terraform Init
+ working-directory: terraform
+ run: terraform init -backend=false
+
+ - name: Terraform Validate
+ working-directory: terraform
+ run: terraform validate
+
+ - name: Set up Python
+ uses: actions/setup-python@v5
+ with:
+ python-version: '3.11'
+
+ - name: Install Python dependencies
+ run: |
+ python -m pip install --upgrade pip
+ pip install pytest pytest-testdox
+ pip install -r requirements.txt
+
+ - name: Run pytest
+ run: pytest tests/ -vvrP --testdox
+ continue-on-error: true
+ id: pytest
+
+ - name: Check on failures
+ if: steps.pytest.outcome == 'failure'
+ run: exit 1
diff --git a/.gitignore b/.gitignore
index bceab93..6aa03fc 100644
--- a/.gitignore
+++ b/.gitignore
@@ -14,5 +14,4 @@ __pycache__/
# OS-Related Files
.DS_Store
-
-*venv*
+venv \ No newline at end of file
diff --git a/Makefile b/Makefile
deleted file mode 100644
index 077cd98..0000000
--- a/Makefile
+++ /dev/null
@@ -1,80 +0,0 @@
-##############################################
-# #
-# MAKEFILE TO BUILD THE PROJECT #
-# #
-##############################################
-
-PROJECT_NAME = de-project-bentley
-REGION = eu-west-2
-PYTHON_INTERPRETER = python
-WD=$(shell pwd)
-PYTHONPATH=${WD}
-SHELL := /bin/bash
-PROFILE = default
-PIP:=pip
-
-## PYTHON INTERPRETER ENVIRONMENT
-create-environment:
- @echo ">>> About to create environment: $(PROJECT_NAME)..."
- @echo ">>> check python3 version"
- ( \
- $(PYTHON_INTERPRETER) --version; \
- )
- @echo ">>> Setting up VirtualEnv."
- ( \
- $(PIP) install -q virtualenv virtualenvwrapper; \
- virtualenv venv --python=$(PYTHON_INTERPRETER); \
- )
-
-ACTIVATE_ENV := source venv/bin/activate
-
-# Execute python related functionalities from within the project's environment
-define execute_in_env
- $(ACTIVATE_ENV) && $1
-endef
-
-## Build the environment requirements
-requirements: create-environment
- $(call execute_in_env, $(PIP) install -r ./requirements.txt)
-
-# Set Up
-## Install bandit
-bandit:
- $(call execute_in_env, $(PIP) install bandit)
-
-## Install safety
-safety:
- $(call execute_in_env, $(PIP) install safety)
-
-## Install black
-black:
- $(call execute_in_env, $(PIP) install black)
-
-## Install coverage
-coverage:
- $(call execute_in_env, $(PIP) install coverage)
-
-## Set up dev requirements (bandit, safety, black)
-dev-setup: bandit safety black coverage
-
-# Build / Run
-
-## Run the security test (bandit + safety)
-security-test:
- $(call execute_in_env, safety check -r ./requirements.txt)
- $(call execute_in_env, bandit -lll */*.py *c/*/*.py)
-
-## Run the black code check
-run-black:
- $(call execute_in_env, black ./src/*/*.py ./test/*/*.py)
-
-## Run the unit tests
-unit-test:
- $(call execute_in_env, PYTHONPATH=${PYTHONPATH} pytest -v)
-
-## Run the coverage check
-check-coverage:
- $(call execute_in_env, PYTHONPATH=${PYTHONPATH} pytest --cov=src test/)
-
-## Run all checks
-run-checks: security-test run-black unit-test check-coverage
diff --git a/README.md b/README.md
index 6bc75dc..7d7e499 100644
--- a/README.md
+++ b/README.md
@@ -1,5 +1,13 @@
# ToteSys - Data Engineering Project
+[![Python](https://img.shields.io/badge/Python-FFD43B?style=for-the-badge&logo=python&logoColor=blue)](https://www.python.org/)
+[![AWS](https://img.shields.io/badge/Amazon_AWS-FF9900?style=for-the-badge&logo=amazonaws&logoColor=white)](https://aws.amazon.com/)
+[![Terraform](https://img.shields.io/badge/Terraform-7B42BC?style=for-the-badge&logo=terraform&logoColor=white)](https://www.terraform.io/)
+[![Postgresql](https://img.shields.io/badge/PostgreSQL-316192?style=for-the-badge&logo=postgresql&logoColor=white)](https://www.postgresql.org/)
+[![GitHub Actions](https://img.shields.io/badge/GitHub_Actions-2088FF?style=for-the-badge&logo=github-actions&logoColor=white)](https://github.com/features/actions)
+
+[![Terraform Main Deployment Workflow Status](https://img.shields.io/github/actions/workflow/status/ajschofield/de-project-bentley/deploy.yml?branch=main&style=flat-square&label=deploy)](https://github.com/ajschofield/de-project-bentley/actions/workflows/deploy.yml?query=branch%3Amain)
+[![Production Environment Status](https://img.shields.io/github/deployments/ajschofield/de-project-bentley/production?style=flat-square&label=env)](https://github.com/ajschofield/de-project-bentley/deployments/production)
# Summary
The project aims to implement a data platform that can extract data from an
operational database, archive it in a data lake, and make it easily accessible
@@ -13,7 +21,7 @@ The solution showcases our skills in:
- Amazon Web Services (AWS)
- Agile methodologies
-# Main Objective
+# Main Objectives
Our goal is to create a reliable ETL (Extract, Transform, Load) pipeline that
can:
@@ -40,4 +48,4 @@ others.
TBA
# Contributors
-TBA \ No newline at end of file
+TBA
diff --git a/scripts/deploy.sh b/scripts/deploy.sh
new file mode 100755
index 0000000..f631bbc
--- /dev/null
+++ b/scripts/deploy.sh
@@ -0,0 +1,56 @@
+# Deploy Script
+# Description: Deploy and destroy Terraform
+# WARNING: This will most likely destroy any current infrastructure if protections
+# are not in place. Be careful!
+
+# Exit if any command has a non-zero status
+set -e
+
+# Change current directory to terraform folder at the start
+cd ../terraform/
+
+echo "WARNING: This script will destroy any infrastructure for testing."
+echo "It should not be used once a proper deployment has been setup."
+echo "Would you like to continue?"
+
+select yn in "Yes" "No"; do
+ case $yn in
+ Yes)
+ echo "Would you like to destroy the current infrastructure?"
+ select destroy_1 in "Yes" "No"; do
+ case $destroy_1 in
+ Yes)
+ terraform destroy
+ break
+ ;;
+ No)
+ echo "Skipping initial destroy..."
+ break
+ ;;
+ esac
+ done
+
+ terraform apply
+
+ echo "Would you like to destroy the newly-created infrastructure?"
+ select destroy_2 in "Yes" "No"; do
+ case $destroy_2 in
+ Yes)
+ terraform destroy
+ break
+ ;;
+ No)
+ echo "Skipping final destroy... Infrastructure will remain."
+ break
+ ;;
+ esac
+ done
+
+ break
+ ;;
+ No)
+ echo "Operation cancelled..."
+ exit
+ ;;
+ esac
+done
diff --git a/scripts/make_layer_zip.sh b/scripts/make_layer_zip.sh
new file mode 100755
index 0000000..eabe301
--- /dev/null
+++ b/scripts/make_layer_zip.sh
@@ -0,0 +1,8 @@
+# Description: Make the zip file for the layer
+
+cd "$(dirname "$0")/.."
+mkdir -p python/lib/python3.11/site-packages
+pip3 install --upgrade -r requirements.txt -t python/lib/python3.11/site-packages
+rm layer.zip
+zip -r layer.zip python
+rm -r python/
diff --git a/src/extract_lambda.py b/src/extract_lambda.py
index 4168e27..24f0981 100644
--- a/src/extract_lambda.py
+++ b/src/extract_lambda.py
@@ -1,17 +1,24 @@
-from pg8000.native import Connection, InterfaceError, identifier
-import boto3
import csv
-from botocore.exceptions import ClientError
-import logging
import json
-from datetime import datetime
+import logging
import re
+from datetime import datetime
+from io import StringIO
+
+import boto3
+from botocore.exceptions import ClientError
+from pg8000.native import Connection, InterfaceError, identifier
+logger = logging.getLogger(__name__)
-logger = logging.getLogger()
-logger.setLevel(logging.INFO)
+logging.basicConfig(
+ format="{asctime} - {levelname} - {message}",
+ style="{",
+ datefmt="%Y-%m-%d %H:%M",
+ level=logging.DEBUG,
+)
-# DB Exception class
+logging.getLogger("botocore").setLevel(logging.WARNING)
class DBConnectionException(Exception):
@@ -28,6 +35,7 @@ def lambda_handler(event, context):
and converts all tables to CSV and if any of those tables do not exist in, or are different to the ones in s3, it uploads them
it uses 3 helper functions to achieve these 3 functionalities
"""
+ db = None
try:
db = connect_to_database()
existing_files = list_existing_s3_files()
@@ -39,38 +47,44 @@ def lambda_handler(event, context):
"statusCode": 200,
"body": json.dumps("No changes detected, no CSV files were uploaded."),
}
- else:
- return {
- "statusCode": 200,
- "body": json.dumps(
- f"""CSV files processed for {', '.join(any_changes['updated'])} and uploaded successfully.{
- 'The following tables were not updated: '+', '.join(any_changes['no change']) if any_changes['no change'] else ''}"""
- ),
- }
+ return {
+ "statusCode": 200,
+ "body": json.dumps(
+ f"""CSV files processed for {', '.join(any_changes['updated'])} and uploaded successfully.{
+ 'The following tables were not updated: '+', '.join(any_changes['no change']) if any_changes['no change'] else ''}"""
+ ),
+ }
except Exception as e:
- logger.error(f"Error: {e}")
+ logger.error(f"Error: {e}", exc_info=True)
return {"statusCode": 500, "body": json.dumps("Internal server error.")}
finally:
if db:
db.close()
-def retrieve_secrets(
- sm_client=boto3.client("secretsmanager"), secret_name="bentley-secrets"
-):
+def retrieve_secrets():
+ secret_name = "bentley-secrets"
+ region_name = "eu-west-2"
+
+ # Create a Secrets Manager client
+ session = boto3.session.Session()
+ client = session.client(service_name="secretsmanager", region_name=region_name)
+
try:
- response = sm_client.get_secret_value(SecretId=secret_name)
- if "SecretString" in response:
- secret = json.loads(response["SecretString"])
- return secret
+ get_secret_value_response = client.get_secret_value(SecretId=secret_name)
except ClientError as e:
- logger.error(f"Could not retrieve secrets: {e}")
+ logger.error(f"Failed to retrieve secret {secret_name}: {str(e)}")
raise e
+ except KeyError:
+ logger.error(f"Secret {secret_name} does not contain a SecretString")
+ raise ValueError(f"Secret {secret_name} does not contain a SecretString")
+
+ return get_secret_value_response["SecretString"]
def connect_to_database() -> Connection:
try:
- secrets = retrieve_secrets()
+ secrets = json.loads(retrieve_secrets())
host = secrets["host"]
port = secrets["port"]
user = secrets["user"]
@@ -90,6 +104,7 @@ def extract_bucket(client=boto3.client("s3")):
extract_bucket_filter = [
bucket["Name"] for bucket in response["Buckets"] if "extract" in bucket["Name"]
]
+
return extract_bucket_filter[0]
@@ -98,7 +113,7 @@ def list_existing_s3_files(bucket_name=extract_bucket(), client=boto3.client("s3
results of listing the contents of the s3 bucket, then
returns the populated dictionary
"""
-
+ logging.info("Listing existing S3 files")
existing_files = {}
try:
@@ -115,6 +130,7 @@ def list_existing_s3_files(bucket_name=extract_bucket(), client=boto3.client("s3
logger.error(f"Error retrieving S3 object {s3_key}: {e}")
else:
logger.error("The bucket is empty")
+ return None
except ClientError as e:
logger.error(f"Error listing S3 objects: {e}")
@@ -122,6 +138,21 @@ def list_existing_s3_files(bucket_name=extract_bucket(), client=boto3.client("s3
return existing_files
+def get_latest_timestamp(existing_files):
+ if existing_files:
+ all_datetimes = []
+ for file_name in existing_files.keys():
+ match = re.search(r"\/(.+/).+_(.+)\.csv", file_name)
+ if match:
+ datetime_str = "".join(match.group(1, 2))
+ all_datetimes.append(
+ datetime.strptime(datetime_str, "%Y/%m/%d/%H:%M:%S")
+ )
+ return max(all_datetimes) if all_datetimes else datetime.min
+
+ return existing_files
+
+
def process_and_upload_tables(db, existing_files, client=boto3.client("s3")):
"""Creates a list of the tables from a database query and
then selects everything from each table in individual queries
@@ -130,29 +161,35 @@ def process_and_upload_tables(db, existing_files, client=boto3.client("s3")):
to files, or new tables/files it uploads them to the s3 bucket
"""
load_status = {"updated": [], "no change": []}
- # Retrieving the latest file timestamp from S3 extract bucket
- all_datetimes = []
- for file_names in existing_files.keys():
- datetime_str_on_s3 = "".join(
- re.search(r"\/(.+/).+_(.+)\.csv", file_names).group(1, 2)
- )
- all_datetimes.append(datetime.strptime(datetime_str_on_s3, "%Y/%m/%d/%H:%M:%S"))
- latest_timestamp = max(all_datetimes)
+ latest_timestamp = get_latest_timestamp(existing_files)
- # Iterating through tables on the database and retrieving only latest changes vs previous file load
tables = db.run(
"""
- SELECT table_name
- FROM information_schema.tables
- WHERE table_schema='public' AND table_type='BASE TABLE';"""
+ SELECT table_name
+ FROM information_schema.tables
+ WHERE table_schema='public' AND table_name != '_prisma_migrations'
+ AND table_type='BASE TABLE';
+ """
)
+
for table in tables:
table_name = table[0]
- rows = db.run(
- f"SELECT * FROM {identifier(table_name)} " "WHERE last_updated >= :latest;",
- latest={datetime.strftime(latest_timestamp, "%H-%m-%d %H:%M:%S")},
+ base_query = f"""
+ SELECT * FROM {identifier(table_name)}
+ WHERE last_updated >= :latest;
+ """
+ latest = (
+ {
+ datetime.strftime(
+ latest_timestamp if latest_timestamp else datetime(1990, 1, 1),
+ "%Y-%m-%d %H:%M:%S",
+ )
+ },
)
-
+ logger.info(f"Processing table: {table_name}")
+ logger.info(f"Latest timestamp: {latest[0]}")
+ rows = db.run(base_query, latest=latest)
+ logger.debug(f"Rows: {rows}")
# Creating a temporary file path and writing the column name to it followed by each row of data
if rows:
csv_file_path = f"/tmp/{table_name}.csv"
@@ -182,7 +219,9 @@ def process_and_upload_tables(db, existing_files, client=boto3.client("s3")):
logger.error(f"Error uploading to S3: {e}")
else:
load_status["no change"].append(table_name)
- logger.info(
- f"No new data in {table_name} name. Latest data retrieved is from {latest_timestamp}."
- )
+ logger.info(f"No new data")
return load_status
+
+
+if __name__ == "__main__":
+ lambda_handler(None, None)
diff --git a/src/load_lambda.py b/src/load_lambda.py
index 6ee681f..c6a8e60 100644
--- a/src/load_lambda.py
+++ b/src/load_lambda.py
@@ -1,2 +1,2 @@
def lambda_handler():
- pass \ No newline at end of file
+ pass
diff --git a/src/secrets_manager.py b/src/secrets_manager.py
deleted file mode 100644
index c0fb61e..0000000
--- a/src/secrets_manager.py
+++ /dev/null
@@ -1,48 +0,0 @@
-import boto3
-from botocore.exceptions import ClientError
-import json
-
-
-def sm_client():
- sm_client = boto3.client('secretsmanager')
- yield sm_client
-
-def create_secret(sm_client, secret_name, cohort_id, user, password, host, database, port):
- secret = {
- "cohort_id": cohort_id,
- "user": user,
- "password": password,
- "host": host,
- "database": database,
- "port": port
- }
-
- response = sm_client.create_secret(
- Name = secret_name,
- SecretString = json.dumps(secret)
- )
-
- print(response)
- return response
-
-def list_secret(sm_client):
- response = sm_client.list_secrets()
- secret_dict = response['SecretList']
- secret_names = []
- for items in secret_dict:
- secret_names.append(items['Name'])
- print(f'{len(secret_names)} secret(s) available')
- for name in secret_names:
- print(name)
- return secret_names
-
-def retrieve_secrets(sm_client):
- response = sm_client.get_secrets(
-
- )
-
-
-
-#retrieve secret
-#so lambda can access totesy db
-#so lambda connect to the db and then retrieve the data \ No newline at end of file
diff --git a/src/transform_lambda.py b/src/transform_lambda.py
index b176ccc..9238180 100644
--- a/src/transform_lambda.py
+++ b/src/transform_lambda.py
@@ -1,4 +1,3 @@
-# from src.extract_lambda import extract_bucket
import json
import boto3
import re
diff --git a/terraform/events.tf b/terraform/events.tf
index 263141f..53ae10a 100644
--- a/terraform/events.tf
+++ b/terraform/events.tf
@@ -1,3 +1,7 @@
+#################
+# Random String #
+#################
+
resource "random_string" "eventbridge_suffix" {
length = 8
special = false
@@ -16,6 +20,10 @@ resource "random_string" "s3_transform_suffix" {
upper = false
}
+#############################
+# EventBridge Configuration #
+#############################
+
resource "aws_cloudwatch_event_rule" "lambda_trigger" {
name = "lambda-scheduled-trigger"
description = "Schedule to trigger the Lambda function"
@@ -25,7 +33,7 @@ resource "aws_cloudwatch_event_rule" "lambda_trigger" {
resource "aws_cloudwatch_event_target" "extract_lambda_cw_event" {
rule = aws_cloudwatch_event_rule.lambda_trigger.name
target_id = "TargetFunctionV1"
- arn = aws_lambda_function.extract_lambda.arn #replaced lambda name placeholder
+ arn = aws_lambda_function.extract_lambda.arn
depends_on = [aws_lambda_permission.allow_eventbridge]
}
@@ -37,54 +45,64 @@ resource "aws_lambda_permission" "allow_eventbridge" {
source_arn = aws_cloudwatch_event_rule.lambda_trigger.arn
lifecycle {
- replace_triggered_by = [random_string.eventbridge_suffix]
+ create_before_destroy = true
+ replace_triggered_by = [random_string.eventbridge_suffix]
}
}
-# below is step function 1
+########################################
+# S3 Extract Bucket Notification Setup #
+########################################
+
resource "aws_lambda_permission" "allow_s3_ingestion" {
statement_id = "AllowS3InvokeLambdaTransform${random_string.s3_ingestion_suffix.result}"
action = "lambda:InvokeFunction"
- function_name = aws_lambda_function.transform_lambda.function_name #replaced lambda name placeholder
+ function_name = aws_lambda_function.transform_lambda.function_name
principal = "s3.amazonaws.com"
- source_arn = aws_s3_bucket.extract_bucket.arn #replaced bucket name placeholder
+ source_arn = aws_s3_bucket.extract_bucket.arn
lifecycle {
- replace_triggered_by = [random_string.s3_ingestion_suffix]
+ create_before_destroy = true
+ replace_triggered_by = [random_string.s3_ingestion_suffix]
}
}
resource "aws_s3_bucket_notification" "extract_bucket_notification" {
- bucket = aws_s3_bucket.extract_bucket.id #replaced bucket name placeholder
+ bucket = aws_s3_bucket.extract_bucket.id
lambda_function {
events = ["s3:ObjectCreated:*"]
- lambda_function_arn = aws_lambda_function.transform_lambda.arn #replaced lambda name placeholder
+ lambda_function_arn = aws_lambda_function.transform_lambda.arn
}
depends_on = [aws_lambda_permission.allow_s3_ingestion]
}
+##########################################
+# S3 Transform Bucket Notification Setup #
+##########################################
+
resource "aws_lambda_permission" "allow_s3_transform_bucket" {
statement_id = "AllowS3InvokeLambdaTransform${random_string.s3_transform_suffix.result}"
action = "lambda:InvokeFunction"
- function_name = aws_lambda_function.transform_lambda.function_name #replaced lambda name placeholder
+ function_name = aws_lambda_function.transform_lambda.function_name
principal = "s3.amazonaws.com"
- source_arn = aws_s3_bucket.transform_bucket.arn #replaced bucket name placeholder
+ source_arn = aws_s3_bucket.transform_bucket.arn
lifecycle {
- replace_triggered_by = [random_string.s3_transform_suffix]
+ create_before_destroy = true
+ replace_triggered_by = [random_string.s3_transform_suffix]
}
}
resource "aws_s3_bucket_notification" "transform_bucket_notification" {
- bucket = aws_s3_bucket.transform_bucket.id #replaced bucket name placeholder
+ bucket = aws_s3_bucket.transform_bucket.id
lambda_function {
events = ["s3:ObjectCreated:*"]
- lambda_function_arn = aws_lambda_function.transform_lambda.arn #replaced lambda name placeholder
+ lambda_function_arn = aws_lambda_function.transform_lambda.arn
}
depends_on = [aws_lambda_permission.allow_s3_transform_bucket]
diff --git a/terraform/iam.tf b/terraform/iam.tf
index 0e5fa6d..3d62b69 100644
--- a/terraform/iam.tf
+++ b/terraform/iam.tf
@@ -28,17 +28,21 @@ resource "aws_iam_role" "multi_service_role" {
########################################################################
# S3 SETUP
# Description: allows allows retention/tagging/access control settings
-# Lambda IAM Policy for S3 Write
+# Lambda IAM Policy for S3
########################################################################
# S3 DEFINE POLICY
data "aws_iam_policy_document" "s3_data_policy_doc" {
statement {
+ effect = "Allow"
actions = [
"s3:PutObject",
"s3:PutObjectRetention",
"s3:PutObjectTagging",
- "s3:PutObjectAcl"
+ "s3:PutObjectAcl",
+ "s3:ListObjects",
+ "s3:ListObjectsV2",
+ "s3:GetObject"
]
resources = [
"${aws_s3_bucket.extract_bucket.arn}/*",
@@ -46,6 +50,19 @@ data "aws_iam_policy_document" "s3_data_policy_doc" {
"${aws_s3_bucket.lambda_code_bucket.arn}/*",
]
}
+
+ statement {
+ effect = "Allow"
+ actions = [
+ "s3:ListBucket",
+ "s3:ListAllMyBuckets",
+ "s3:ListObjectsV2",
+ "s3:ListObjects"
+ ]
+ resources = [
+ "arn:aws:s3:::*",
+ ]
+ }
}
@@ -156,3 +173,30 @@ resource "aws_iam_role_policy_attachment" "cloudwatch_events_attachment" {
role = aws_iam_role.multi_service_role.name
policy_arn = aws_iam_policy.cloudwatch_events_policy.arn
}
+
+#########################
+# SECRETS MANAGER SETUP #
+#########################
+
+# Policy Doc
+data "aws_iam_policy_document" "secrets_manager_policy_doc" {
+ statement {
+ effect = "Allow"
+ actions = [
+ "secretsmanager:GetSecretValue"
+ ]
+ resources = ["arn:aws:secretsmanager:${data.aws_region.current.name}:${data.aws_caller_identity.current.account_id}:secret:bentley-secrets-Na0yc8"]
+ }
+}
+
+# SM Policy Resource
+resource "aws_iam_policy" "secrets_manager_policy" {
+ name = "secrets_manager_policy"
+ policy = data.aws_iam_policy_document.secrets_manager_policy_doc.json
+}
+
+# Attach SM Policy to Role
+resource "aws_iam_role_policy_attachment" "secrets_manager_attachment" {
+ role = aws_iam_role.multi_service_role.name
+ policy_arn = aws_iam_policy.secrets_manager_policy.arn
+}
diff --git a/terraform/lambda.tf b/terraform/lambda.tf
index 67fd6eb..d33a6c9 100644
--- a/terraform/lambda.tf
+++ b/terraform/lambda.tf
@@ -1,4 +1,54 @@
-# Extract Lambda Function
+####################
+# Common Variables #
+####################
+
+locals {
+ layer_dir = "../"
+ layer_zip = "layer.zip"
+ layer_name = "lambda_layer"
+ script_dir = "../scripts"
+ layer_zip_path = "${local.layer_dir}/${local.layer_zip}"
+}
+
+######################
+# Lambda Layer Setup #
+######################
+
+resource "null_resource" "prepare_layer" {
+
+ # New change: only run the script if the layer zip does not exist
+
+ triggers = {
+ layer_zip_exists = fileexists(local.layer_zip_path) ? "exists" : "not_exists"
+ }
+
+ provisioner "local-exec" {
+ command = "if [ ! -f ${local.layer_zip_path} ]; then bash ${local.script_dir}/make_layer_zip.sh; fi"
+ }
+}
+
+resource "aws_s3_object" "lambda_layer_zip" {
+ bucket = aws_s3_bucket.lambda_code_bucket.id #bucket instead of id
+ key = "${local.layer_name}/${local.layer_zip}"
+ source = "${local.layer_dir}/${local.layer_zip}"
+ depends_on = [null_resource.prepare_layer]
+ etag = fileexists(local.layer_zip_path) ? filemd5(local.layer_zip_path) : null
+}
+
+resource "aws_lambda_layer_version" "lambda_layer" {
+ layer_name = local.layer_name
+ compatible_runtimes = ["python3.11"]
+ s3_bucket = aws_s3_bucket.lambda_code_bucket.bucket
+ s3_key = aws_s3_object.lambda_layer_zip.key
+ source_code_hash = fileexists(local.layer_zip_path) ? filebase64sha256(local.layer_zip_path) : null
+ skip_destroy = true
+ depends_on = [aws_s3_object.lambda_layer_zip]
+}
+
+###########################
+# Extract Lambda Function #
+###########################
+
data "archive_file" "extract_lambda_zip" {
type = "zip"
source_file = "${path.module}/../src/extract_lambda.py"
@@ -12,12 +62,15 @@ resource "aws_s3_object" "extract_lambda_code" {
}
resource "aws_lambda_function" "extract_lambda" {
- function_name = var.extract_lambda_name
- s3_bucket = aws_s3_bucket.lambda_code_bucket.bucket
- s3_key = aws_s3_object.extract_lambda_code.key
- role = aws_iam_role.multi_service_role.arn
- handler = "extract_lambda.extract"
- runtime = "python3.11"
+ function_name = var.extract_lambda_name
+ s3_bucket = aws_s3_bucket.lambda_code_bucket.bucket
+ s3_key = aws_s3_object.extract_lambda_code.key
+ layers = [aws_lambda_layer_version.lambda_layer.arn]
+ role = aws_iam_role.multi_service_role.arn
+ handler = "extract_lambda.lambda_handler"
+ runtime = "python3.11"
+ source_code_hash = data.archive_file.extract_lambda_zip.output_base64sha256
+ timeout = 180
lifecycle {
create_before_destroy = true
@@ -26,7 +79,10 @@ resource "aws_lambda_function" "extract_lambda" {
depends_on = [aws_s3_object.extract_lambda_code]
}
-# Transform Lambda Function
+#############################
+# Transform Lambda Function #
+#############################
+
data "archive_file" "transform_lambda_zip" {
type = "zip"
source_file = "${path.module}/../src/transform_lambda.py"
@@ -40,12 +96,15 @@ resource "aws_s3_object" "transform_lambda_code" {
}
resource "aws_lambda_function" "transform_lambda" {
- function_name = var.transform_lambda_name
- s3_bucket = aws_s3_bucket.lambda_code_bucket.bucket
- s3_key = aws_s3_object.transform_lambda_code.key
- role = aws_iam_role.multi_service_role.arn
- handler = "transform_lambda.transform"
- runtime = "python3.11"
+ function_name = var.transform_lambda_name
+ s3_bucket = aws_s3_bucket.lambda_code_bucket.bucket
+ s3_key = aws_s3_object.transform_lambda_code.key
+ layers = [aws_lambda_layer_version.lambda_layer.arn]
+ role = aws_iam_role.multi_service_role.arn
+ handler = "transform_lambda.lambda_handler"
+ runtime = "python3.11"
+ source_code_hash = data.archive_file.transform_lambda_zip.output_base64sha256
+ timeout = 180
lifecycle {
create_before_destroy = true
@@ -54,7 +113,10 @@ resource "aws_lambda_function" "transform_lambda" {
depends_on = [aws_s3_object.transform_lambda_code]
}
-# Load Lambda Function
+########################
+# Load Lambda Function #
+########################
+
data "archive_file" "load_lambda_zip" {
type = "zip"
source_file = "${path.module}/../src/load_lambda.py"
@@ -68,12 +130,15 @@ resource "aws_s3_object" "load_lambda_code" {
}
resource "aws_lambda_function" "load_lambda" {
- function_name = var.load_lambda_name
- s3_bucket = aws_s3_bucket.lambda_code_bucket.bucket
- s3_key = aws_s3_object.load_lambda_code.key
- role = aws_iam_role.multi_service_role.arn
- handler = "load_lambda.load"
- runtime = "python3.11"
+ function_name = var.load_lambda_name
+ s3_bucket = aws_s3_bucket.lambda_code_bucket.bucket
+ s3_key = aws_s3_object.load_lambda_code.key
+ layers = [aws_lambda_layer_version.lambda_layer.arn]
+ role = aws_iam_role.multi_service_role.arn
+ handler = "load_lambda.lambda_handler"
+ runtime = "python3.11"
+ source_code_hash = data.archive_file.load_lambda_zip.output_base64sha256
+ timeout = 180
lifecycle {
create_before_destroy = true
@@ -82,37 +147,3 @@ resource "aws_lambda_function" "load_lambda" {
depends_on = [aws_s3_object.load_lambda_code]
}
-locals {
- layer_dir = "${path.module}/.."
- requirements = "${path.module}/../requirements.txt"
- layer_zip = "${path.module}/../layer.zip"
-}
-
-resource "null_resource" "prepare_layer" {
- triggers = {
- requirements_hash = filesha1(local.requirements)
- }
- provisioner "local-exec" {
- command = <<EOT
- mkdir -p ${local.layer_dir}/python/lib/python3.11/site-packages/
- pip install -r ${local.requirements} -t ${local.layer_dir}/python/lib/python3.11/site-packages/
- cd ${local.layer_dir} && zip -r ${local.layer_zip} .
- EOT
- }
-}
-
-resource "aws_s3_object" "layer_zip" {
- bucket = aws_s3_bucket.lambda_code_bucket.bucket
- key = "layer.zip"
- source = local.layer_zip
- depends_on = [null_resource.prepare_layer]
-}
-
-resource "aws_lambda_layer_version" "lambda_layer" {
- layer_name = "lambda_layer"
- compatible_runtimes = ["python3.11"]
- s3_bucket = aws_s3_bucket.lambda_code_bucket.bucket
- s3_key = aws_s3_object.layer_zip.key
- skip_destroy = true
- depends_on = [aws_s3_object.layer_zip]
-}
diff --git a/terraform/main.tf b/terraform/main.tf
index 310a251..6577b70 100644
--- a/terraform/main.tf
+++ b/terraform/main.tf
@@ -1,4 +1,5 @@
terraform {
+ required_version = ">= 1.8.0"
required_providers {
aws = {
source = "hashicorp/aws"
@@ -12,11 +13,16 @@ terraform {
source = "hashicorp/archive"
version = "~>2.5.0"
}
+ random = {
+ source = "hashicorp/random"
+ version = "~>3.6.2"
+ }
}
backend "s3" {
- bucket = "bentley-project-secrets"
- key = "bentley-project/terraform.tfstate"
- region = "eu-west-2"
+ bucket = "bentley-project-secrets"
+ key = "bentley-project/terraform.tfstate"
+ region = "eu-west-2"
+ encrypt = true
}
}
@@ -24,11 +30,11 @@ provider "aws" {
region = "eu-west-2"
default_tags {
tags = {
- ProjectName = "Terrific-Totes"
- Team = "Team-Bentley"
- Environment = "Dev"
- GitHubRepo = "de-project-bentley"
+ ProjectName = var.project_name
+ Environment = var.environment
ManagedBy = "Terraform"
+ GitHubRepo = var.github_repo
+ Team = var.team_name
}
}
}
diff --git a/terraform/rds.tf b/terraform/rds.tf
deleted file mode 100644
index a013fb3..0000000
--- a/terraform/rds.tf
+++ /dev/null
@@ -1,70 +0,0 @@
-# data "aws_availability_zones" "available" {}
-
-# module "vpc" {
-# source = "terraform-aws-modules/vpc/aws"
-# version = "5.12.1"
-
-# name = var.project_name
-# cidr = "10.0.0.0/16"
-# azs = data.aws_availability_zones.available.names
-# public_subnets = ["10.0.4.0/24", "10.0.5.0/24", "10.0.6.0/24"]
-# enable_dns_hostnames = true
-# enable_dns_support = true
-# }
-
-# resource "aws_db_subnet_group" "Terrific-Totes-sub-gr" {
-# name = "tt-db-subnet"
-# subnet_ids = module.vpc.public_subnets
-
-# tags = {
-# Name = "${var.project_name}"
-# }
-# }
-
-# resource "aws_security_group" "rds" {
-# name = "${var.project_name}-rds"
-# vpc_id = module.vpc.vpc_id
-
-# ingress {
-# from_port = 5432
-# to_port = 5432
-# protocol = "tcp"
-# cidr_blocks = ["0.0.0.0/0"]
-# }
-
-# egress {
-# from_port = 5432
-# to_port = 5432
-# protocol = "tcp"
-# cidr_blocks = ["0.0.0.0/0"]
-# }
-
-# tags = {
-# Name = "${var.project_name}-rds"
-# }
-# }
-
-# resource "aws_db_parameter_group" "Terrific-Totes-param-gr" {
-# name = "tt-db-param"
-# family = "postgres14"
-
-# parameter {
-# name = "log_connections"
-# value = "1"
-# }
-# }
-
-# resource "aws_db_instance" "terrific-totes-rds" {
-# db_name = var.project_name
-# instance_class = "db.t3.micro"
-# allocated_storage = 5
-# engine = "postgres"
-# engine_version = "14.10"
-# username = ""
-# password = ""
-# db_subnet_group_name = aws_db_subnet_group.Terrific-Totes-sub-gr.name
-# vpc_security_group_ids = [aws_security_group.rds.id]
-# parameter_group_name = aws_db_parameter_group.Terrific-Totes-param-gr.name
-# publicly_accessible = false
-# skip_final_snapshot = true
-# }
diff --git a/terraform/s3.tf b/terraform/s3.tf
index d5cdee3..14e8835 100644
--- a/terraform/s3.tf
+++ b/terraform/s3.tf
@@ -1,14 +1,57 @@
-### EXTRACT BUCKET SET-UP
+########################
+# EXTRACT BUCKET SETUP #
+########################
+
resource "aws_s3_bucket" "extract_bucket" {
bucket_prefix = "${var.s3_extract_bucket_name}-"
+ force_destroy = true
+ tags = {
+ Name = "Ingestion Bucket"
+ }
+}
+
+resource "aws_s3_bucket_versioning" "extract_bucket_versioning" {
+ bucket = aws_s3_bucket.extract_bucket.id
+ versioning_configuration {
+ status = "Enabled"
+ }
}
-### TRANSFORM BUCKET SET-UP
+##########################
+# TRANSFORM BUCKET SETUP #
+##########################
+
resource "aws_s3_bucket" "transform_bucket" {
bucket_prefix = "${var.s3_transform_bucket_name}-"
+ force_destroy = true
+ tags = {
+ Name = "Transform Bucket"
+ }
+}
+
+
+resource "aws_s3_bucket_versioning" "transform_bucket_versioning" {
+ bucket = aws_s3_bucket.transform_bucket.id
+ versioning_configuration {
+ status = "Enabled"
+ }
}
-### LAMBDA BUCKET
+#######################
+# LAMBDA BUCKET SETUP #
+#######################
+
resource "aws_s3_bucket" "lambda_code_bucket" {
bucket_prefix = "${var.s3_code_bucket_name}-"
+ force_destroy = true
+ tags = {
+ Name = "Lambda Bucket"
+ }
+}
+
+resource "aws_s3_bucket_versioning" "lambda_bucket_versioning" {
+ bucket = aws_s3_bucket.lambda_code_bucket.id
+ versioning_configuration {
+ status = "Enabled"
+ }
}
diff --git a/terraform/vars.tf b/terraform/vars.tf
index 3c88731..b3e3e47 100644
--- a/terraform/vars.tf
+++ b/terraform/vars.tf
@@ -33,6 +33,21 @@ variable "project_name" {
default = "tt"
}
+variable "environment" {
+ type = string
+ default = "dev"
+}
+
+variable "github_repo" {
+ type = string
+ default = "de-project-bentley"
+}
+
+variable "team_name" {
+ type = string
+ default = "Team-Bentley"
+}
+
data "aws_caller_identity" "current" {}
data "aws_region" "current" {}
diff --git a/test.py b/test.py
deleted file mode 100644
index e69de29..0000000
--- a/test.py
+++ /dev/null
diff --git a/test/test_secrets_manager.py b/test/test_secrets_manager.py
deleted file mode 100644
index 86533bc..0000000
--- a/test/test_secrets_manager.py
+++ /dev/null
@@ -1,34 +0,0 @@
-from src.secrets_manager import sm_client, create_secret, list_secret
-import boto3
-from moto import mock_aws
-import json
-import pytest
-import os
-
-pytest.fixture(scope='class')
-def mock_aws_credentials():
- """Mocked AWS Credentials for moto."""
- os.environ["AWS_ACCESS_KEY_ID"] = "testing"
- os.environ["AWS_SECRET_ACCESS_KEY"] = "testing"
- os.environ["AWS_SECURITY_TOKEN"] = "testing"
- os.environ["AWS_SESSION_TOKEN"] = "testing"
- os.environ["AWS_DEFAULT_REGION"] = "eu-west-2"
-
-@pytest.fixture(scope='class')
-def mock_sm_client(mock_aws_credentials):
- with mock_aws():
- yield boto3.client('secretsmanager')
-
-
-def test_create_secret_stores_secrets(mock_sm_client):
- cohort_id = "test_cohort_id"
- user = "test_user_id"
- password = "test_password"
- host = "test_host"
- database = "test_database"
- port = "test_port"
-
- secret_name = "test_secret"
- response = create_secret(mock_sm_client, secret_name, cohort_id, user, password, host, database, port)
-
- assert response['Name'] == secret_name \ No newline at end of file
diff --git a/tests/dummy_identical.csv b/tests/dummy_identical.csv
index fdd8993..e44e9fc 100644
--- a/tests/dummy_identical.csv
+++ b/tests/dummy_identical.csv
@@ -1,4 +1,4 @@
-Food_type,Flavour,Colour
-Vegetable,Sour,Green
-Berry,Sweet,Red
+Food_type,Flavour,Colour,last_updated
+Vegetable,Sour,Green,2022-11-03 14:20:49.962
+Berry,Sweet,Red,2022-11-03 14:20:49.962
diff --git a/tests/test_extract_lambda.py b/tests/test_extract_lambda.py
index e94a8a4..548ce67 100644
--- a/tests/test_extract_lambda.py
+++ b/tests/test_extract_lambda.py
@@ -1,13 +1,25 @@
+import boto3.exceptions
+import botocore.exceptions
import pytest
import boto3
from moto import mock_aws
from unittest.mock import patch, MagicMock
from unittest import TestCase
-from src.extract_lambda import list_existing_s3_files, connect_to_database, DBConnectionException, process_and_upload_tables
-import os
+import os
import logging
-
-@pytest.fixture(scope='class')
+import json
+from src.extract_lambda import (
+ list_existing_s3_files,
+ connect_to_database,
+ DBConnectionException,
+ lambda_handler,
+ process_and_upload_tables,
+ retrieve_secrets,
+ extract_bucket,
+)
+
+
+@pytest.fixture(scope="class")
def mock_config():
env_vars = {
"host": "abc",
@@ -16,94 +28,220 @@ def mock_config():
"password": "password",
"database": "db",
}
- with patch("src.extract_lambda.get_config", return_value=env_vars) as mock_config:
+ with patch(
+ "src.extract_lambda.retrieve_secrets", return_value=env_vars
+ ) as mock_config:
yield mock_config
-@pytest.fixture(scope='class')
+@pytest.fixture(scope="class")
def aws_credentials():
- os.environ["AWS_ACCESS_KEY_ID"] = 'testing'
- os.environ["AWS_SECRET_ACCESS_KEY"] = 'testing'
- os.environ["AWS_SECURIT_TOKEN"] = 'testing'
- os.environ["AWS_SESSION_TOKEN"] = 'testing'
- os.environ["AWS_DEFAULT_REGION"]= 'eu-west-2'
+ os.environ["AWS_ACCESS_KEY_ID"] = "testing"
+ os.environ["AWS_SECRET_ACCESS_KEY"] = "testing"
+ os.environ["AWS_SECURITY_TOKEN"] = "testing"
+ os.environ["AWS_SESSION_TOKEN"] = "testing"
+ os.environ["AWS_DEFAULT_REGION"] = "eu-west-2"
+
-@pytest.fixture(scope='class')
+@pytest.fixture(scope="class")
def s3_client(aws_credentials):
with mock_aws():
- yield boto3.client('s3')
+ yield boto3.client("s3")
+
+
+@pytest.fixture(scope="class")
+def s3_mock_bucket(s3_client):
+ bucket = s3_client.create_bucket(
+ Bucket="extract_bucket",
+ CreateBucketConfiguration={"LocationConstraint": "eu-west-2"},
+ )
+ return bucket
+
+
+class TestLambdaHandler:
+ def test_files_processed_and_uploaded_successfully(self, mocker):
+ mock_db = MagicMock()
+ mock_db.run.side_effect = [
+ [["Fruits"]],
+ [["Vegetable", "Sour", "Green"], ["Berry", "Sweet", "Red"]],
+ [["Food_type"], ["Flavour"], ["Colour"]],
+ ]
+ mock_db.columns.return_value = [
+ {"name": "Food_type"},
+ {"name": "Flavour"},
+ {"name": "Colour"},
+ ]
+ with patch("src.extract_lambda.connect_to_database", return_value=mock_db):
+ mock_process_and_upload_tables = mocker.patch(
+ "src.extract_lambda.process_and_upload_tables",
+ return_value={
+ "updated": ["Fruits"],
+ "no change": ["Vegetable", "Berry"],
+ },
+ )
+ mock_list_existing_s3_files = mocker.patch(
+ "src.extract_lambda.list_existing_s3_files", return_value={}
+ )
+ event = {}
+ context = {}
+ response = lambda_handler(event, context)
+ assert response["statusCode"] == 200
+ assert json.loads(response["body"]) == (
+ "CSV files processed for Fruits and uploaded successfully."
+ "The following tables were not updated: Vegetable, Berry"
+ )
+ mock_list_existing_s3_files.assert_called_once()
+ mock_process_and_upload_tables.assert_called_once_with(mock_db, {})
+ mock_db.close.assert_called_once()
+
+ def test_no_changes_detected_no_files_uploaded(self, mocker):
+ mock_db = MagicMock()
+ mock_db.run.side_effect = [
+ [["Fruits"]],
+ [["Vegetable", "Sour", "Green"], ["Berry", "Sweet", "Red"]],
+ [["Food_type"], ["Flavour"], ["Colour"]],
+ ]
+ mock_db.columns.return_value = [
+ {"name": "Food_type"},
+ {"name": "Flavour"},
+ {"name": "Colour"},
+ ]
+
+ with patch("src.extract_lambda.connect_to_database", return_value=mock_db):
+ mock_process_and_upload_tables = mocker.patch(
+ "src.extract_lambda.process_and_upload_tables",
+ return_value={"updated": [], "no change": ["Fruits"]},
+ )
+ mock_list_existing_s3_files = mocker.patch(
+ "src.extract_lambda.list_existing_s3_files", return_value={}
+ )
+ event = {}
+ context = {}
+ response = lambda_handler(event, context)
+ assert response["statusCode"] == 200
+ assert (
+ json.loads(response["body"])
+ == "No changes detected, no CSV files were uploaded."
+ )
+ mock_list_existing_s3_files.assert_called_once()
+ mock_process_and_upload_tables.assert_called_once_with(mock_db, {})
+ mock_db.close.assert_called_once()
+
+ def test_exception_error(self, mocker):
+ with patch(
+ "src.extract_lambda.connect_to_database",
+ side_effect=Exception("Database connection error"),
+ ):
+ mock_process_and_upload_tables = mocker.patch(
+ "src.extract_lambda.process_and_upload_tables"
+ )
+ mock_list_existing_s3_files = mocker.patch(
+ "src.extract_lambda.list_existing_s3_files"
+ )
+ event = {}
+ context = {}
+ response = lambda_handler(event, context)
+ assert response["statusCode"] == 500
+ assert json.loads(response["body"]) == "Internal server error."
+ mock_list_existing_s3_files.assert_not_called()
+ mock_process_and_upload_tables.assert_not_called()
+
+
+class TestExtractBucket:
+ def test_extract_bucket_returns_bucket_name(self, s3_client, s3_mock_bucket):
+ result = extract_bucket(s3_client)
+ assert result == "extract_bucket"
+
+ def test_bucket_returns_first_bucket(self, s3_client):
+ bucket1 = s3_client.create_bucket(
+ Bucket="bucket1",
+ CreateBucketConfiguration={"LocationConstraint": "eu-west-2"},
+ )
+ result = extract_bucket(s3_client)
+ assert result == "extract_bucket"
+
+ def test_returns_index_error_if_no_buckets(self, s3_client):
+ s3_client.delete_bucket(Bucket="extract_bucket")
+ s3_client.delete_bucket(Bucket="bucket1")
+
+ with pytest.raises(IndexError, match="list index out of range"):
+ extract_bucket(s3_client)
+
class TestListExistingS3Files:
def test_error_if_no_bucket(self, s3_client, caplog):
-
logger = logging.getLogger()
- logger.info('Testing now.')
+ logger.info("Testing now.")
caplog.set_level(logging.ERROR)
list_existing_s3_files(client=s3_client)
- assert 'Error listing S3 objects' in caplog.text
-
- def test_error_if_bucket_is_empty(self, s3_client, caplog):
-
- s3_client.create_bucket(Bucket='extract_bucket',
- CreateBucketConfiguration={
- 'LocationConstraint': 'eu-west-2'
- })
- list_existing_s3_files(client=s3_client)
- assert 'The bucket is empty' in caplog.text
-
- def test_error_retrieving_object(self, s3_client, caplog):
- s3_client.upload_file('tests/dummy.txt', 'extract_bucket', 'dummy.txt')
- list_existing_s3_files(bucket_name='test_bucket', client=s3_client)
+ assert "Error listing S3 objects" in caplog.text
- assert 'Error retrieving S3 object ' in caplog.text
+ def test_error_if_bucket_is_empty(self, s3_client, caplog, s3_mock_bucket):
+ list_existing_s3_files("extract_bucket", client=s3_client)
+ assert "The bucket is empty" in caplog.text
- def test_retrieves_file_content(self, s3_client, caplog):
- result = list_existing_s3_files(client=s3_client)
+ def test_retrieves_file_content(self, s3_client, caplog, s3_mock_bucket):
+ s3_client.upload_file("tests/dummy.txt", "extract_bucket", "dummy.txt")
+ result = list_existing_s3_files("extract_bucket", client=s3_client)
+ assert list(result.values()) == ["This is a test file."]
- assert list(result.values()) == ['This is a test file.']
class TestConnectToDatabase:
def test_connect_to_database(mock_conn, mock_config):
- with patch("src.extract_lambda.Connection", autospec=True) as mock_conn:
+ with patch("src.extract_lambda.Connection", autospec=True) as mock_conn:
connect_to_database()
mock_conn.assert_called_with(
- host="abc", user="def", port="5432", password="password", database="db"
+ host="abc", user="def", port="5432", password="password", database="db"
)
- def test_database_error(self, mock_config):
+ def test_database_error(self, mock_config): # had mock_config in param
with pytest.raises(DBConnectionException):
connect_to_database()
def test_logs_interface_error(self, caplog):
logger = logging.getLogger()
- logger.info('Testing now.')
+ logger.info("Testing now.")
caplog.set_level(logging.ERROR)
with pytest.raises(DBConnectionException):
connect_to_database()
- assert 'Interface error' in caplog.text
-'''
+ assert "Interface error" in caplog.text
+
+
class TestProcessAndUploadTables:
- def test_error_process_and_upload_tables(mock_conn, mock_config, s3_client, caplog):
- logger = logging.getLogger()
- logger.info('Testing now.')
- caplog.set_level(logging.ERROR)
- ####
- queries = ["SELECT table_name FROM information_schema.tables WHERE table_schema='public' AND table_type='BASE TABLE';",
- "SELECT * FROM Fruits;",
- "SELECT column_name FROM INFORMATION_SCHEMA.COLUMNS where table_name = 'Fruits'"]
- return_values = [[['Fruits']],
- [['Vegetable','Sour','Green'],['Berry','Sweet','Red']],
- [['Food_type'],['Flavour'],['Colour']]]
- vals = dict(zip(queries,return_values))
-
- ####
- with patch('src.extract_lambda.connect_to_database') as mock_db:
- mock_db().run.side_effects = return_values
- s3_key = 'Fruits/2024/08/15/Fruits_16:46:30.csv'
- existing_files = {s3_key: 'Food_type,Flavour,Colour\nFruit,Sour,Green\nBerry,Sweet,Red'}
- s3_client.create_bucket(Bucket='extract_bucket',
- CreateBucketConfiguration={'LocationConstraint': 'eu-west-2'})
- s3_client.upload_file('tests/dummy_identical.csv', 'extract_bucket', s3_key)
+ def test_error_process_and_upload_tables(self, mock_conn, s3_client, caplog):
+ caplog.set_level(logging.INFO)
+
+ # Mock return values for database queries
+ queries = [
+ "SELECT table_name FROM information_schema.tables WHERE table_schema='public' AND table_type='BASE TABLE';",
+ "SELECT * FROM Fruits WHERE last_updated > :latest;",
+ "SELECT column_name FROM INFORMATION_SCHEMA.COLUMNS where table_name = 'Fruits';",
+ ]
+ return_values = [
+ [["Fruits"]],
+ [], # No new rows with a more recent last_updated timestamp
+ [["Food_type"], ["Flavour"], ["Colour"], ["last_updated"]],
+ ]
+ vals = dict(zip(queries, return_values))
+
+ # Patch the database connection and set return values for queries
+ with patch("src.extract_lambda.Connection") as mock_db:
+ mock_db().run.side_effect = return_values
+ s3_key = "Fruits/2024/08/15/Fruits_16:46:30.csv"
+ existing_files = {
+ s3_key: "Food_type,Flavour,Colour,last_updated\nVegetable,Sour,Green,2022-11-03 14:20:49.962\nBerry,Sweet,Red,2022-11-03 14:20:49.962"
+ }
+
+ # Simulate S3 bucket and file setup
+ s3_client.create_bucket(
+ Bucket="test_extract_bucket",
+ CreateBucketConfiguration={"LocationConstraint": "eu-west-2"},
+ )
+ s3_client.upload_file(
+ "tests/dummy_identical.csv", "test_extract_bucket", s3_key
+ )
+
+ # Run the process_and_upload_tables function
process_and_upload_tables(mock_db(), existing_files, client=s3_client)
- assert 'No new data.' in caplog.text
-''' \ No newline at end of file
+ # Assert that the log contains "No new data"
+ assert "No new data" in caplog.text
diff --git a/tests/test_secrets_manager.py b/tests/test_secrets_manager.py
index a30be86..609c572 100644
--- a/tests/test_secrets_manager.py
+++ b/tests/test_secrets_manager.py
@@ -3,10 +3,11 @@ import boto3
import botocore.exceptions
from moto import mock_aws
import json
-import pytest
+import pytest
import os
-@pytest.fixture(scope='function')
+
+@pytest.fixture(scope="function")
def aws_credentials():
"""Mocked AWS Credentials for moto."""
os.environ["AWS_ACCESS_KEY_ID"] = "testing"
@@ -15,12 +16,14 @@ def aws_credentials():
os.environ["AWS_SESSION_TOKEN"] = "testing"
os.environ["AWS_DEFAULT_REGION"] = "eu-west-2"
-@pytest.fixture(scope='function')
+
+@pytest.fixture(scope="function")
def mock_sm_client(aws_credentials):
with mock_aws():
yield boto3.client("secretsmanager")
-@pytest.fixture(scope='function')
+
+@pytest.fixture(scope="function")
def mock_store_secret(mock_sm_client):
secret = {
"cohort_id": "test_cohort_id",
@@ -28,15 +31,18 @@ def mock_store_secret(mock_sm_client):
"password": "test_password",
"host": "test_host",
"database": "test_database",
- "port": "test_port"
+ "port": "test_port",
}
secret_name = "test_secret"
- response = mock_sm_client.create_secret(Name=secret_name, SecretString=json.dumps(secret))
+ response = mock_sm_client.create_secret(
+ Name=secret_name, SecretString=json.dumps(secret)
+ )
return response
+
def test_retrieves_secrets_returns_dictionary(mock_sm_client, mock_store_secret):
secret_name = "test_secret"
@@ -44,8 +50,10 @@ def test_retrieves_secrets_returns_dictionary(mock_sm_client, mock_store_secret)
assert isinstance(result, dict)
-def test_retrieves_secrets_returns_correct_keys_and_values(mock_sm_client, mock_store_secret):
+def test_retrieves_secrets_returns_correct_keys_and_values(
+ mock_sm_client, mock_store_secret
+):
secret_name = "test_secret"
result = retrieve_secrets(mock_sm_client, secret_name)
@@ -57,17 +65,20 @@ def test_retrieves_secrets_returns_correct_keys_and_values(mock_sm_client, mock_
assert result["database"] == "test_database"
assert result["port"] == "test_port"
-def test_retrieves_secrets_raises_error_if_secret_name_incorrect_data_type(mock_sm_client):
- secret_name = [1, 2, 3]
+def test_retrieves_secrets_raises_error_if_secret_name_incorrect_data_type(
+ mock_sm_client,
+):
+ secret_name = [1, 2, 3]
with pytest.raises(botocore.exceptions.ParamValidationError) as error:
retrieve_secrets(mock_sm_client, secret_name)
-def test_retrieves_secrets_raises_error_if_secret_name_does_not_exist(mock_sm_client, mock_store_secret):
- secret_name = 'test_secret_2'
-
+def test_retrieves_secrets_raises_error_if_secret_name_does_not_exist(
+ mock_sm_client, mock_store_secret
+):
+ secret_name = "test_secret_2"
with pytest.raises(botocore.exceptions.ClientError) as error:
- retrieve_secrets(mock_sm_client, secret_name) \ No newline at end of file
+ retrieve_secrets(mock_sm_client, secret_name)
git.ajschof.me — hosted by ajschofield — powered by cgit