diff options
| author | bulve-ad <78788030+bulve-ad@users.noreply.github.com> | 2024-08-21 15:51:03 +0100 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2024-08-21 15:51:03 +0100 |
| commit | ce76bbb2b32b58a93d88db4abdb1bbfbf27243ea (patch) | |
| tree | b8e77c62b6a2d50ab04215beb54055d14210a423 | |
| parent | c8e94530b65d6807b2b9bb246a542963839cce9d (diff) | |
| parent | d01d3bed939d7a17ea2205af502baeeb35510b5c (diff) | |
| download | de-project-bentley-ce76bbb2b32b58a93d88db4abdb1bbfbf27243ea.tar.gz de-project-bentley-ce76bbb2b32b58a93d88db4abdb1bbfbf27243ea.zip | |
Merge branch 'development' into feature/transform_lambda
| -rw-r--r-- | .deepsource.toml | 6 | ||||
| -rw-r--r-- | .github/workflows/deploy.yml | 42 | ||||
| -rw-r--r-- | .github/workflows/dev-tests.yml | 48 | ||||
| -rw-r--r-- | .gitignore | 3 | ||||
| -rw-r--r-- | Makefile | 80 | ||||
| -rw-r--r-- | README.md | 12 | ||||
| -rwxr-xr-x | scripts/deploy.sh | 56 | ||||
| -rwxr-xr-x | scripts/make_layer_zip.sh | 8 | ||||
| -rw-r--r-- | src/extract_lambda.py | 131 | ||||
| -rw-r--r-- | src/load_lambda.py | 2 | ||||
| -rw-r--r-- | src/secrets_manager.py | 48 | ||||
| -rw-r--r-- | src/transform_lambda.py | 1 | ||||
| -rw-r--r-- | terraform/events.tf | 44 | ||||
| -rw-r--r-- | terraform/iam.tf | 48 | ||||
| -rw-r--r-- | terraform/lambda.tf | 141 | ||||
| -rw-r--r-- | terraform/main.tf | 20 | ||||
| -rw-r--r-- | terraform/rds.tf | 70 | ||||
| -rw-r--r-- | terraform/s3.tf | 49 | ||||
| -rw-r--r-- | terraform/vars.tf | 15 | ||||
| -rw-r--r-- | test.py | 0 | ||||
| -rw-r--r-- | test/test_secrets_manager.py | 34 | ||||
| -rw-r--r-- | tests/dummy_identical.csv | 6 | ||||
| -rw-r--r-- | tests/test_extract_lambda.py | 262 | ||||
| -rw-r--r-- | tests/test_secrets_manager.py | 37 |
24 files changed, 676 insertions, 487 deletions
diff --git a/.deepsource.toml b/.deepsource.toml index a840b78..42d0973 100644 --- a/.deepsource.toml +++ b/.deepsource.toml @@ -1,5 +1,7 @@ version = 1 +test_patterns = ["tests/**"] + [[analyzers]] name = "sql" @@ -22,6 +24,4 @@ name = "black" name = "autopep8" [[transformers]] -name = "ruff" - - +name = "ruff"
\ No newline at end of file diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml deleted file mode 100644 index 5672048..0000000 --- a/.github/workflows/deploy.yml +++ /dev/null @@ -1,42 +0,0 @@ -name: deploy-terraform - -on: - pull_request: - branches: - - main - push: - branches: - - main - - -jobs: - deploy-terraform: - name: Deploy Terraform - runs-on: ubuntu-latest - #needs: run-checks (must ref on-commit.yml file) - environment: production - steps: - - name: Checkout Repo - uses: actions/checkout@v4 - - - name: Install Terraform - uses: hashicorp/setup-terraform@v3 - - - name: Configure AWS Credentials - uses: aws-actions/configure-aws-credentials@v4 - with: - aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} - aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} - aws-region: ${{ secrets.AWS_REGION }} - - - name: Terraform Init - working-directory: terraform - run: terraform init - - - name: Terraform Plan - working-directory: terraform - run: terraform plan - - - name: Terraform Apply - working-directory: terraform - run: terraform apply --auto-approve diff --git a/.github/workflows/dev-tests.yml b/.github/workflows/dev-tests.yml new file mode 100644 index 0000000..d66f1c6 --- /dev/null +++ b/.github/workflows/dev-tests.yml @@ -0,0 +1,48 @@ +name: dev-tests + +on: + pull_request: + branches: + - development + push: + branches: + - development + +jobs: + validate-and-test: + name: Validate Terraform and Run Tests + runs-on: ubuntu-latest + steps: + - name: Checkout Repo + uses: actions/checkout@v4 + + - name: Install Terraform + uses: hashicorp/setup-terraform@v3 + + - name: Terraform Init + working-directory: terraform + run: terraform init -backend=false + + - name: Terraform Validate + working-directory: terraform + run: terraform validate + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: '3.11' + + - name: Install Python dependencies + run: | + python -m pip install --upgrade pip + pip install pytest pytest-testdox + pip install -r requirements.txt + + - name: Run pytest + run: pytest tests/ -vvrP --testdox + continue-on-error: true + id: pytest + + - name: Check on failures + if: steps.pytest.outcome == 'failure' + run: exit 1 @@ -14,5 +14,4 @@ __pycache__/ # OS-Related Files .DS_Store - -*venv* +venv
\ No newline at end of file diff --git a/Makefile b/Makefile deleted file mode 100644 index 077cd98..0000000 --- a/Makefile +++ /dev/null @@ -1,80 +0,0 @@ -############################################## -# # -# MAKEFILE TO BUILD THE PROJECT # -# # -############################################## - -PROJECT_NAME = de-project-bentley -REGION = eu-west-2 -PYTHON_INTERPRETER = python -WD=$(shell pwd) -PYTHONPATH=${WD} -SHELL := /bin/bash -PROFILE = default -PIP:=pip - -## PYTHON INTERPRETER ENVIRONMENT -create-environment: - @echo ">>> About to create environment: $(PROJECT_NAME)..." - @echo ">>> check python3 version" - ( \ - $(PYTHON_INTERPRETER) --version; \ - ) - @echo ">>> Setting up VirtualEnv." - ( \ - $(PIP) install -q virtualenv virtualenvwrapper; \ - virtualenv venv --python=$(PYTHON_INTERPRETER); \ - ) - -ACTIVATE_ENV := source venv/bin/activate - -# Execute python related functionalities from within the project's environment -define execute_in_env - $(ACTIVATE_ENV) && $1 -endef - -## Build the environment requirements -requirements: create-environment - $(call execute_in_env, $(PIP) install -r ./requirements.txt) - -# Set Up -## Install bandit -bandit: - $(call execute_in_env, $(PIP) install bandit) - -## Install safety -safety: - $(call execute_in_env, $(PIP) install safety) - -## Install black -black: - $(call execute_in_env, $(PIP) install black) - -## Install coverage -coverage: - $(call execute_in_env, $(PIP) install coverage) - -## Set up dev requirements (bandit, safety, black) -dev-setup: bandit safety black coverage - -# Build / Run - -## Run the security test (bandit + safety) -security-test: - $(call execute_in_env, safety check -r ./requirements.txt) - $(call execute_in_env, bandit -lll */*.py *c/*/*.py) - -## Run the black code check -run-black: - $(call execute_in_env, black ./src/*/*.py ./test/*/*.py) - -## Run the unit tests -unit-test: - $(call execute_in_env, PYTHONPATH=${PYTHONPATH} pytest -v) - -## Run the coverage check -check-coverage: - $(call execute_in_env, PYTHONPATH=${PYTHONPATH} pytest --cov=src test/) - -## Run all checks -run-checks: security-test run-black unit-test check-coverage @@ -1,5 +1,13 @@ # ToteSys - Data Engineering Project +[](https://www.python.org/) +[](https://aws.amazon.com/) +[](https://www.terraform.io/) +[](https://www.postgresql.org/) +[](https://github.com/features/actions) + +[](https://github.com/ajschofield/de-project-bentley/actions/workflows/deploy.yml?query=branch%3Amain) +[](https://github.com/ajschofield/de-project-bentley/deployments/production) # Summary The project aims to implement a data platform that can extract data from an operational database, archive it in a data lake, and make it easily accessible @@ -13,7 +21,7 @@ The solution showcases our skills in: - Amazon Web Services (AWS) - Agile methodologies -# Main Objective +# Main Objectives Our goal is to create a reliable ETL (Extract, Transform, Load) pipeline that can: @@ -40,4 +48,4 @@ others. TBA # Contributors -TBA
\ No newline at end of file +TBA diff --git a/scripts/deploy.sh b/scripts/deploy.sh new file mode 100755 index 0000000..f631bbc --- /dev/null +++ b/scripts/deploy.sh @@ -0,0 +1,56 @@ +# Deploy Script +# Description: Deploy and destroy Terraform +# WARNING: This will most likely destroy any current infrastructure if protections +# are not in place. Be careful! + +# Exit if any command has a non-zero status +set -e + +# Change current directory to terraform folder at the start +cd ../terraform/ + +echo "WARNING: This script will destroy any infrastructure for testing." +echo "It should not be used once a proper deployment has been setup." +echo "Would you like to continue?" + +select yn in "Yes" "No"; do + case $yn in + Yes) + echo "Would you like to destroy the current infrastructure?" + select destroy_1 in "Yes" "No"; do + case $destroy_1 in + Yes) + terraform destroy + break + ;; + No) + echo "Skipping initial destroy..." + break + ;; + esac + done + + terraform apply + + echo "Would you like to destroy the newly-created infrastructure?" + select destroy_2 in "Yes" "No"; do + case $destroy_2 in + Yes) + terraform destroy + break + ;; + No) + echo "Skipping final destroy... Infrastructure will remain." + break + ;; + esac + done + + break + ;; + No) + echo "Operation cancelled..." + exit + ;; + esac +done diff --git a/scripts/make_layer_zip.sh b/scripts/make_layer_zip.sh new file mode 100755 index 0000000..eabe301 --- /dev/null +++ b/scripts/make_layer_zip.sh @@ -0,0 +1,8 @@ +# Description: Make the zip file for the layer + +cd "$(dirname "$0")/.." +mkdir -p python/lib/python3.11/site-packages +pip3 install --upgrade -r requirements.txt -t python/lib/python3.11/site-packages +rm layer.zip +zip -r layer.zip python +rm -r python/ diff --git a/src/extract_lambda.py b/src/extract_lambda.py index 4168e27..24f0981 100644 --- a/src/extract_lambda.py +++ b/src/extract_lambda.py @@ -1,17 +1,24 @@ -from pg8000.native import Connection, InterfaceError, identifier -import boto3 import csv -from botocore.exceptions import ClientError -import logging import json -from datetime import datetime +import logging import re +from datetime import datetime +from io import StringIO + +import boto3 +from botocore.exceptions import ClientError +from pg8000.native import Connection, InterfaceError, identifier +logger = logging.getLogger(__name__) -logger = logging.getLogger() -logger.setLevel(logging.INFO) +logging.basicConfig( + format="{asctime} - {levelname} - {message}", + style="{", + datefmt="%Y-%m-%d %H:%M", + level=logging.DEBUG, +) -# DB Exception class +logging.getLogger("botocore").setLevel(logging.WARNING) class DBConnectionException(Exception): @@ -28,6 +35,7 @@ def lambda_handler(event, context): and converts all tables to CSV and if any of those tables do not exist in, or are different to the ones in s3, it uploads them it uses 3 helper functions to achieve these 3 functionalities """ + db = None try: db = connect_to_database() existing_files = list_existing_s3_files() @@ -39,38 +47,44 @@ def lambda_handler(event, context): "statusCode": 200, "body": json.dumps("No changes detected, no CSV files were uploaded."), } - else: - return { - "statusCode": 200, - "body": json.dumps( - f"""CSV files processed for {', '.join(any_changes['updated'])} and uploaded successfully.{ - 'The following tables were not updated: '+', '.join(any_changes['no change']) if any_changes['no change'] else ''}""" - ), - } + return { + "statusCode": 200, + "body": json.dumps( + f"""CSV files processed for {', '.join(any_changes['updated'])} and uploaded successfully.{ + 'The following tables were not updated: '+', '.join(any_changes['no change']) if any_changes['no change'] else ''}""" + ), + } except Exception as e: - logger.error(f"Error: {e}") + logger.error(f"Error: {e}", exc_info=True) return {"statusCode": 500, "body": json.dumps("Internal server error.")} finally: if db: db.close() -def retrieve_secrets( - sm_client=boto3.client("secretsmanager"), secret_name="bentley-secrets" -): +def retrieve_secrets(): + secret_name = "bentley-secrets" + region_name = "eu-west-2" + + # Create a Secrets Manager client + session = boto3.session.Session() + client = session.client(service_name="secretsmanager", region_name=region_name) + try: - response = sm_client.get_secret_value(SecretId=secret_name) - if "SecretString" in response: - secret = json.loads(response["SecretString"]) - return secret + get_secret_value_response = client.get_secret_value(SecretId=secret_name) except ClientError as e: - logger.error(f"Could not retrieve secrets: {e}") + logger.error(f"Failed to retrieve secret {secret_name}: {str(e)}") raise e + except KeyError: + logger.error(f"Secret {secret_name} does not contain a SecretString") + raise ValueError(f"Secret {secret_name} does not contain a SecretString") + + return get_secret_value_response["SecretString"] def connect_to_database() -> Connection: try: - secrets = retrieve_secrets() + secrets = json.loads(retrieve_secrets()) host = secrets["host"] port = secrets["port"] user = secrets["user"] @@ -90,6 +104,7 @@ def extract_bucket(client=boto3.client("s3")): extract_bucket_filter = [ bucket["Name"] for bucket in response["Buckets"] if "extract" in bucket["Name"] ] + return extract_bucket_filter[0] @@ -98,7 +113,7 @@ def list_existing_s3_files(bucket_name=extract_bucket(), client=boto3.client("s3 results of listing the contents of the s3 bucket, then returns the populated dictionary """ - + logging.info("Listing existing S3 files") existing_files = {} try: @@ -115,6 +130,7 @@ def list_existing_s3_files(bucket_name=extract_bucket(), client=boto3.client("s3 logger.error(f"Error retrieving S3 object {s3_key}: {e}") else: logger.error("The bucket is empty") + return None except ClientError as e: logger.error(f"Error listing S3 objects: {e}") @@ -122,6 +138,21 @@ def list_existing_s3_files(bucket_name=extract_bucket(), client=boto3.client("s3 return existing_files +def get_latest_timestamp(existing_files): + if existing_files: + all_datetimes = [] + for file_name in existing_files.keys(): + match = re.search(r"\/(.+/).+_(.+)\.csv", file_name) + if match: + datetime_str = "".join(match.group(1, 2)) + all_datetimes.append( + datetime.strptime(datetime_str, "%Y/%m/%d/%H:%M:%S") + ) + return max(all_datetimes) if all_datetimes else datetime.min + + return existing_files + + def process_and_upload_tables(db, existing_files, client=boto3.client("s3")): """Creates a list of the tables from a database query and then selects everything from each table in individual queries @@ -130,29 +161,35 @@ def process_and_upload_tables(db, existing_files, client=boto3.client("s3")): to files, or new tables/files it uploads them to the s3 bucket """ load_status = {"updated": [], "no change": []} - # Retrieving the latest file timestamp from S3 extract bucket - all_datetimes = [] - for file_names in existing_files.keys(): - datetime_str_on_s3 = "".join( - re.search(r"\/(.+/).+_(.+)\.csv", file_names).group(1, 2) - ) - all_datetimes.append(datetime.strptime(datetime_str_on_s3, "%Y/%m/%d/%H:%M:%S")) - latest_timestamp = max(all_datetimes) + latest_timestamp = get_latest_timestamp(existing_files) - # Iterating through tables on the database and retrieving only latest changes vs previous file load tables = db.run( """ - SELECT table_name - FROM information_schema.tables - WHERE table_schema='public' AND table_type='BASE TABLE';""" + SELECT table_name + FROM information_schema.tables + WHERE table_schema='public' AND table_name != '_prisma_migrations' + AND table_type='BASE TABLE'; + """ ) + for table in tables: table_name = table[0] - rows = db.run( - f"SELECT * FROM {identifier(table_name)} " "WHERE last_updated >= :latest;", - latest={datetime.strftime(latest_timestamp, "%H-%m-%d %H:%M:%S")}, + base_query = f""" + SELECT * FROM {identifier(table_name)} + WHERE last_updated >= :latest; + """ + latest = ( + { + datetime.strftime( + latest_timestamp if latest_timestamp else datetime(1990, 1, 1), + "%Y-%m-%d %H:%M:%S", + ) + }, ) - + logger.info(f"Processing table: {table_name}") + logger.info(f"Latest timestamp: {latest[0]}") + rows = db.run(base_query, latest=latest) + logger.debug(f"Rows: {rows}") # Creating a temporary file path and writing the column name to it followed by each row of data if rows: csv_file_path = f"/tmp/{table_name}.csv" @@ -182,7 +219,9 @@ def process_and_upload_tables(db, existing_files, client=boto3.client("s3")): logger.error(f"Error uploading to S3: {e}") else: load_status["no change"].append(table_name) - logger.info( - f"No new data in {table_name} name. Latest data retrieved is from {latest_timestamp}." - ) + logger.info(f"No new data") return load_status + + +if __name__ == "__main__": + lambda_handler(None, None) diff --git a/src/load_lambda.py b/src/load_lambda.py index 6ee681f..c6a8e60 100644 --- a/src/load_lambda.py +++ b/src/load_lambda.py @@ -1,2 +1,2 @@ def lambda_handler(): - pass
\ No newline at end of file + pass diff --git a/src/secrets_manager.py b/src/secrets_manager.py deleted file mode 100644 index c0fb61e..0000000 --- a/src/secrets_manager.py +++ /dev/null @@ -1,48 +0,0 @@ -import boto3 -from botocore.exceptions import ClientError -import json - - -def sm_client(): - sm_client = boto3.client('secretsmanager') - yield sm_client - -def create_secret(sm_client, secret_name, cohort_id, user, password, host, database, port): - secret = { - "cohort_id": cohort_id, - "user": user, - "password": password, - "host": host, - "database": database, - "port": port - } - - response = sm_client.create_secret( - Name = secret_name, - SecretString = json.dumps(secret) - ) - - print(response) - return response - -def list_secret(sm_client): - response = sm_client.list_secrets() - secret_dict = response['SecretList'] - secret_names = [] - for items in secret_dict: - secret_names.append(items['Name']) - print(f'{len(secret_names)} secret(s) available') - for name in secret_names: - print(name) - return secret_names - -def retrieve_secrets(sm_client): - response = sm_client.get_secrets( - - ) - - - -#retrieve secret -#so lambda can access totesy db -#so lambda connect to the db and then retrieve the data
\ No newline at end of file diff --git a/src/transform_lambda.py b/src/transform_lambda.py index b176ccc..9238180 100644 --- a/src/transform_lambda.py +++ b/src/transform_lambda.py @@ -1,4 +1,3 @@ -# from src.extract_lambda import extract_bucket import json import boto3 import re diff --git a/terraform/events.tf b/terraform/events.tf index 263141f..53ae10a 100644 --- a/terraform/events.tf +++ b/terraform/events.tf @@ -1,3 +1,7 @@ +################# +# Random String # +################# + resource "random_string" "eventbridge_suffix" { length = 8 special = false @@ -16,6 +20,10 @@ resource "random_string" "s3_transform_suffix" { upper = false } +############################# +# EventBridge Configuration # +############################# + resource "aws_cloudwatch_event_rule" "lambda_trigger" { name = "lambda-scheduled-trigger" description = "Schedule to trigger the Lambda function" @@ -25,7 +33,7 @@ resource "aws_cloudwatch_event_rule" "lambda_trigger" { resource "aws_cloudwatch_event_target" "extract_lambda_cw_event" { rule = aws_cloudwatch_event_rule.lambda_trigger.name target_id = "TargetFunctionV1" - arn = aws_lambda_function.extract_lambda.arn #replaced lambda name placeholder + arn = aws_lambda_function.extract_lambda.arn depends_on = [aws_lambda_permission.allow_eventbridge] } @@ -37,54 +45,64 @@ resource "aws_lambda_permission" "allow_eventbridge" { source_arn = aws_cloudwatch_event_rule.lambda_trigger.arn lifecycle { - replace_triggered_by = [random_string.eventbridge_suffix] + create_before_destroy = true + replace_triggered_by = [random_string.eventbridge_suffix] } } -# below is step function 1 +######################################## +# S3 Extract Bucket Notification Setup # +######################################## + resource "aws_lambda_permission" "allow_s3_ingestion" { statement_id = "AllowS3InvokeLambdaTransform${random_string.s3_ingestion_suffix.result}" action = "lambda:InvokeFunction" - function_name = aws_lambda_function.transform_lambda.function_name #replaced lambda name placeholder + function_name = aws_lambda_function.transform_lambda.function_name principal = "s3.amazonaws.com" - source_arn = aws_s3_bucket.extract_bucket.arn #replaced bucket name placeholder + source_arn = aws_s3_bucket.extract_bucket.arn lifecycle { - replace_triggered_by = [random_string.s3_ingestion_suffix] + create_before_destroy = true + replace_triggered_by = [random_string.s3_ingestion_suffix] } } resource "aws_s3_bucket_notification" "extract_bucket_notification" { - bucket = aws_s3_bucket.extract_bucket.id #replaced bucket name placeholder + bucket = aws_s3_bucket.extract_bucket.id lambda_function { events = ["s3:ObjectCreated:*"] - lambda_function_arn = aws_lambda_function.transform_lambda.arn #replaced lambda name placeholder + lambda_function_arn = aws_lambda_function.transform_lambda.arn } depends_on = [aws_lambda_permission.allow_s3_ingestion] } +########################################## +# S3 Transform Bucket Notification Setup # +########################################## + resource "aws_lambda_permission" "allow_s3_transform_bucket" { statement_id = "AllowS3InvokeLambdaTransform${random_string.s3_transform_suffix.result}" action = "lambda:InvokeFunction" - function_name = aws_lambda_function.transform_lambda.function_name #replaced lambda name placeholder + function_name = aws_lambda_function.transform_lambda.function_name principal = "s3.amazonaws.com" - source_arn = aws_s3_bucket.transform_bucket.arn #replaced bucket name placeholder + source_arn = aws_s3_bucket.transform_bucket.arn lifecycle { - replace_triggered_by = [random_string.s3_transform_suffix] + create_before_destroy = true + replace_triggered_by = [random_string.s3_transform_suffix] } } resource "aws_s3_bucket_notification" "transform_bucket_notification" { - bucket = aws_s3_bucket.transform_bucket.id #replaced bucket name placeholder + bucket = aws_s3_bucket.transform_bucket.id lambda_function { events = ["s3:ObjectCreated:*"] - lambda_function_arn = aws_lambda_function.transform_lambda.arn #replaced lambda name placeholder + lambda_function_arn = aws_lambda_function.transform_lambda.arn } depends_on = [aws_lambda_permission.allow_s3_transform_bucket] diff --git a/terraform/iam.tf b/terraform/iam.tf index 0e5fa6d..3d62b69 100644 --- a/terraform/iam.tf +++ b/terraform/iam.tf @@ -28,17 +28,21 @@ resource "aws_iam_role" "multi_service_role" { ######################################################################## # S3 SETUP # Description: allows allows retention/tagging/access control settings -# Lambda IAM Policy for S3 Write +# Lambda IAM Policy for S3 ######################################################################## # S3 DEFINE POLICY data "aws_iam_policy_document" "s3_data_policy_doc" { statement { + effect = "Allow" actions = [ "s3:PutObject", "s3:PutObjectRetention", "s3:PutObjectTagging", - "s3:PutObjectAcl" + "s3:PutObjectAcl", + "s3:ListObjects", + "s3:ListObjectsV2", + "s3:GetObject" ] resources = [ "${aws_s3_bucket.extract_bucket.arn}/*", @@ -46,6 +50,19 @@ data "aws_iam_policy_document" "s3_data_policy_doc" { "${aws_s3_bucket.lambda_code_bucket.arn}/*", ] } + + statement { + effect = "Allow" + actions = [ + "s3:ListBucket", + "s3:ListAllMyBuckets", + "s3:ListObjectsV2", + "s3:ListObjects" + ] + resources = [ + "arn:aws:s3:::*", + ] + } } @@ -156,3 +173,30 @@ resource "aws_iam_role_policy_attachment" "cloudwatch_events_attachment" { role = aws_iam_role.multi_service_role.name policy_arn = aws_iam_policy.cloudwatch_events_policy.arn } + +######################### +# SECRETS MANAGER SETUP # +######################### + +# Policy Doc +data "aws_iam_policy_document" "secrets_manager_policy_doc" { + statement { + effect = "Allow" + actions = [ + "secretsmanager:GetSecretValue" + ] + resources = ["arn:aws:secretsmanager:${data.aws_region.current.name}:${data.aws_caller_identity.current.account_id}:secret:bentley-secrets-Na0yc8"] + } +} + +# SM Policy Resource +resource "aws_iam_policy" "secrets_manager_policy" { + name = "secrets_manager_policy" + policy = data.aws_iam_policy_document.secrets_manager_policy_doc.json +} + +# Attach SM Policy to Role +resource "aws_iam_role_policy_attachment" "secrets_manager_attachment" { + role = aws_iam_role.multi_service_role.name + policy_arn = aws_iam_policy.secrets_manager_policy.arn +} diff --git a/terraform/lambda.tf b/terraform/lambda.tf index 67fd6eb..d33a6c9 100644 --- a/terraform/lambda.tf +++ b/terraform/lambda.tf @@ -1,4 +1,54 @@ -# Extract Lambda Function +#################### +# Common Variables # +#################### + +locals { + layer_dir = "../" + layer_zip = "layer.zip" + layer_name = "lambda_layer" + script_dir = "../scripts" + layer_zip_path = "${local.layer_dir}/${local.layer_zip}" +} + +###################### +# Lambda Layer Setup # +###################### + +resource "null_resource" "prepare_layer" { + + # New change: only run the script if the layer zip does not exist + + triggers = { + layer_zip_exists = fileexists(local.layer_zip_path) ? "exists" : "not_exists" + } + + provisioner "local-exec" { + command = "if [ ! -f ${local.layer_zip_path} ]; then bash ${local.script_dir}/make_layer_zip.sh; fi" + } +} + +resource "aws_s3_object" "lambda_layer_zip" { + bucket = aws_s3_bucket.lambda_code_bucket.id #bucket instead of id + key = "${local.layer_name}/${local.layer_zip}" + source = "${local.layer_dir}/${local.layer_zip}" + depends_on = [null_resource.prepare_layer] + etag = fileexists(local.layer_zip_path) ? filemd5(local.layer_zip_path) : null +} + +resource "aws_lambda_layer_version" "lambda_layer" { + layer_name = local.layer_name + compatible_runtimes = ["python3.11"] + s3_bucket = aws_s3_bucket.lambda_code_bucket.bucket + s3_key = aws_s3_object.lambda_layer_zip.key + source_code_hash = fileexists(local.layer_zip_path) ? filebase64sha256(local.layer_zip_path) : null + skip_destroy = true + depends_on = [aws_s3_object.lambda_layer_zip] +} + +########################### +# Extract Lambda Function # +########################### + data "archive_file" "extract_lambda_zip" { type = "zip" source_file = "${path.module}/../src/extract_lambda.py" @@ -12,12 +62,15 @@ resource "aws_s3_object" "extract_lambda_code" { } resource "aws_lambda_function" "extract_lambda" { - function_name = var.extract_lambda_name - s3_bucket = aws_s3_bucket.lambda_code_bucket.bucket - s3_key = aws_s3_object.extract_lambda_code.key - role = aws_iam_role.multi_service_role.arn - handler = "extract_lambda.extract" - runtime = "python3.11" + function_name = var.extract_lambda_name + s3_bucket = aws_s3_bucket.lambda_code_bucket.bucket + s3_key = aws_s3_object.extract_lambda_code.key + layers = [aws_lambda_layer_version.lambda_layer.arn] + role = aws_iam_role.multi_service_role.arn + handler = "extract_lambda.lambda_handler" + runtime = "python3.11" + source_code_hash = data.archive_file.extract_lambda_zip.output_base64sha256 + timeout = 180 lifecycle { create_before_destroy = true @@ -26,7 +79,10 @@ resource "aws_lambda_function" "extract_lambda" { depends_on = [aws_s3_object.extract_lambda_code] } -# Transform Lambda Function +############################# +# Transform Lambda Function # +############################# + data "archive_file" "transform_lambda_zip" { type = "zip" source_file = "${path.module}/../src/transform_lambda.py" @@ -40,12 +96,15 @@ resource "aws_s3_object" "transform_lambda_code" { } resource "aws_lambda_function" "transform_lambda" { - function_name = var.transform_lambda_name - s3_bucket = aws_s3_bucket.lambda_code_bucket.bucket - s3_key = aws_s3_object.transform_lambda_code.key - role = aws_iam_role.multi_service_role.arn - handler = "transform_lambda.transform" - runtime = "python3.11" + function_name = var.transform_lambda_name + s3_bucket = aws_s3_bucket.lambda_code_bucket.bucket + s3_key = aws_s3_object.transform_lambda_code.key + layers = [aws_lambda_layer_version.lambda_layer.arn] + role = aws_iam_role.multi_service_role.arn + handler = "transform_lambda.lambda_handler" + runtime = "python3.11" + source_code_hash = data.archive_file.transform_lambda_zip.output_base64sha256 + timeout = 180 lifecycle { create_before_destroy = true @@ -54,7 +113,10 @@ resource "aws_lambda_function" "transform_lambda" { depends_on = [aws_s3_object.transform_lambda_code] } -# Load Lambda Function +######################## +# Load Lambda Function # +######################## + data "archive_file" "load_lambda_zip" { type = "zip" source_file = "${path.module}/../src/load_lambda.py" @@ -68,12 +130,15 @@ resource "aws_s3_object" "load_lambda_code" { } resource "aws_lambda_function" "load_lambda" { - function_name = var.load_lambda_name - s3_bucket = aws_s3_bucket.lambda_code_bucket.bucket - s3_key = aws_s3_object.load_lambda_code.key - role = aws_iam_role.multi_service_role.arn - handler = "load_lambda.load" - runtime = "python3.11" + function_name = var.load_lambda_name + s3_bucket = aws_s3_bucket.lambda_code_bucket.bucket + s3_key = aws_s3_object.load_lambda_code.key + layers = [aws_lambda_layer_version.lambda_layer.arn] + role = aws_iam_role.multi_service_role.arn + handler = "load_lambda.lambda_handler" + runtime = "python3.11" + source_code_hash = data.archive_file.load_lambda_zip.output_base64sha256 + timeout = 180 lifecycle { create_before_destroy = true @@ -82,37 +147,3 @@ resource "aws_lambda_function" "load_lambda" { depends_on = [aws_s3_object.load_lambda_code] } -locals { - layer_dir = "${path.module}/.." - requirements = "${path.module}/../requirements.txt" - layer_zip = "${path.module}/../layer.zip" -} - -resource "null_resource" "prepare_layer" { - triggers = { - requirements_hash = filesha1(local.requirements) - } - provisioner "local-exec" { - command = <<EOT - mkdir -p ${local.layer_dir}/python/lib/python3.11/site-packages/ - pip install -r ${local.requirements} -t ${local.layer_dir}/python/lib/python3.11/site-packages/ - cd ${local.layer_dir} && zip -r ${local.layer_zip} . - EOT - } -} - -resource "aws_s3_object" "layer_zip" { - bucket = aws_s3_bucket.lambda_code_bucket.bucket - key = "layer.zip" - source = local.layer_zip - depends_on = [null_resource.prepare_layer] -} - -resource "aws_lambda_layer_version" "lambda_layer" { - layer_name = "lambda_layer" - compatible_runtimes = ["python3.11"] - s3_bucket = aws_s3_bucket.lambda_code_bucket.bucket - s3_key = aws_s3_object.layer_zip.key - skip_destroy = true - depends_on = [aws_s3_object.layer_zip] -} diff --git a/terraform/main.tf b/terraform/main.tf index 310a251..6577b70 100644 --- a/terraform/main.tf +++ b/terraform/main.tf @@ -1,4 +1,5 @@ terraform { + required_version = ">= 1.8.0" required_providers { aws = { source = "hashicorp/aws" @@ -12,11 +13,16 @@ terraform { source = "hashicorp/archive" version = "~>2.5.0" } + random = { + source = "hashicorp/random" + version = "~>3.6.2" + } } backend "s3" { - bucket = "bentley-project-secrets" - key = "bentley-project/terraform.tfstate" - region = "eu-west-2" + bucket = "bentley-project-secrets" + key = "bentley-project/terraform.tfstate" + region = "eu-west-2" + encrypt = true } } @@ -24,11 +30,11 @@ provider "aws" { region = "eu-west-2" default_tags { tags = { - ProjectName = "Terrific-Totes" - Team = "Team-Bentley" - Environment = "Dev" - GitHubRepo = "de-project-bentley" + ProjectName = var.project_name + Environment = var.environment ManagedBy = "Terraform" + GitHubRepo = var.github_repo + Team = var.team_name } } } diff --git a/terraform/rds.tf b/terraform/rds.tf deleted file mode 100644 index a013fb3..0000000 --- a/terraform/rds.tf +++ /dev/null @@ -1,70 +0,0 @@ -# data "aws_availability_zones" "available" {} - -# module "vpc" { -# source = "terraform-aws-modules/vpc/aws" -# version = "5.12.1" - -# name = var.project_name -# cidr = "10.0.0.0/16" -# azs = data.aws_availability_zones.available.names -# public_subnets = ["10.0.4.0/24", "10.0.5.0/24", "10.0.6.0/24"] -# enable_dns_hostnames = true -# enable_dns_support = true -# } - -# resource "aws_db_subnet_group" "Terrific-Totes-sub-gr" { -# name = "tt-db-subnet" -# subnet_ids = module.vpc.public_subnets - -# tags = { -# Name = "${var.project_name}" -# } -# } - -# resource "aws_security_group" "rds" { -# name = "${var.project_name}-rds" -# vpc_id = module.vpc.vpc_id - -# ingress { -# from_port = 5432 -# to_port = 5432 -# protocol = "tcp" -# cidr_blocks = ["0.0.0.0/0"] -# } - -# egress { -# from_port = 5432 -# to_port = 5432 -# protocol = "tcp" -# cidr_blocks = ["0.0.0.0/0"] -# } - -# tags = { -# Name = "${var.project_name}-rds" -# } -# } - -# resource "aws_db_parameter_group" "Terrific-Totes-param-gr" { -# name = "tt-db-param" -# family = "postgres14" - -# parameter { -# name = "log_connections" -# value = "1" -# } -# } - -# resource "aws_db_instance" "terrific-totes-rds" { -# db_name = var.project_name -# instance_class = "db.t3.micro" -# allocated_storage = 5 -# engine = "postgres" -# engine_version = "14.10" -# username = "" -# password = "" -# db_subnet_group_name = aws_db_subnet_group.Terrific-Totes-sub-gr.name -# vpc_security_group_ids = [aws_security_group.rds.id] -# parameter_group_name = aws_db_parameter_group.Terrific-Totes-param-gr.name -# publicly_accessible = false -# skip_final_snapshot = true -# } diff --git a/terraform/s3.tf b/terraform/s3.tf index d5cdee3..14e8835 100644 --- a/terraform/s3.tf +++ b/terraform/s3.tf @@ -1,14 +1,57 @@ -### EXTRACT BUCKET SET-UP +######################## +# EXTRACT BUCKET SETUP # +######################## + resource "aws_s3_bucket" "extract_bucket" { bucket_prefix = "${var.s3_extract_bucket_name}-" + force_destroy = true + tags = { + Name = "Ingestion Bucket" + } +} + +resource "aws_s3_bucket_versioning" "extract_bucket_versioning" { + bucket = aws_s3_bucket.extract_bucket.id + versioning_configuration { + status = "Enabled" + } } -### TRANSFORM BUCKET SET-UP +########################## +# TRANSFORM BUCKET SETUP # +########################## + resource "aws_s3_bucket" "transform_bucket" { bucket_prefix = "${var.s3_transform_bucket_name}-" + force_destroy = true + tags = { + Name = "Transform Bucket" + } +} + + +resource "aws_s3_bucket_versioning" "transform_bucket_versioning" { + bucket = aws_s3_bucket.transform_bucket.id + versioning_configuration { + status = "Enabled" + } } -### LAMBDA BUCKET +####################### +# LAMBDA BUCKET SETUP # +####################### + resource "aws_s3_bucket" "lambda_code_bucket" { bucket_prefix = "${var.s3_code_bucket_name}-" + force_destroy = true + tags = { + Name = "Lambda Bucket" + } +} + +resource "aws_s3_bucket_versioning" "lambda_bucket_versioning" { + bucket = aws_s3_bucket.lambda_code_bucket.id + versioning_configuration { + status = "Enabled" + } } diff --git a/terraform/vars.tf b/terraform/vars.tf index 3c88731..b3e3e47 100644 --- a/terraform/vars.tf +++ b/terraform/vars.tf @@ -33,6 +33,21 @@ variable "project_name" { default = "tt" } +variable "environment" { + type = string + default = "dev" +} + +variable "github_repo" { + type = string + default = "de-project-bentley" +} + +variable "team_name" { + type = string + default = "Team-Bentley" +} + data "aws_caller_identity" "current" {} data "aws_region" "current" {} diff --git a/test.py b/test.py deleted file mode 100644 index e69de29..0000000 --- a/test.py +++ /dev/null diff --git a/test/test_secrets_manager.py b/test/test_secrets_manager.py deleted file mode 100644 index 86533bc..0000000 --- a/test/test_secrets_manager.py +++ /dev/null @@ -1,34 +0,0 @@ -from src.secrets_manager import sm_client, create_secret, list_secret -import boto3 -from moto import mock_aws -import json -import pytest -import os - -pytest.fixture(scope='class') -def mock_aws_credentials(): - """Mocked AWS Credentials for moto.""" - os.environ["AWS_ACCESS_KEY_ID"] = "testing" - os.environ["AWS_SECRET_ACCESS_KEY"] = "testing" - os.environ["AWS_SECURITY_TOKEN"] = "testing" - os.environ["AWS_SESSION_TOKEN"] = "testing" - os.environ["AWS_DEFAULT_REGION"] = "eu-west-2" - -@pytest.fixture(scope='class') -def mock_sm_client(mock_aws_credentials): - with mock_aws(): - yield boto3.client('secretsmanager') - - -def test_create_secret_stores_secrets(mock_sm_client): - cohort_id = "test_cohort_id" - user = "test_user_id" - password = "test_password" - host = "test_host" - database = "test_database" - port = "test_port" - - secret_name = "test_secret" - response = create_secret(mock_sm_client, secret_name, cohort_id, user, password, host, database, port) - - assert response['Name'] == secret_name
\ No newline at end of file diff --git a/tests/dummy_identical.csv b/tests/dummy_identical.csv index fdd8993..e44e9fc 100644 --- a/tests/dummy_identical.csv +++ b/tests/dummy_identical.csv @@ -1,4 +1,4 @@ -Food_type,Flavour,Colour -Vegetable,Sour,Green -Berry,Sweet,Red +Food_type,Flavour,Colour,last_updated +Vegetable,Sour,Green,2022-11-03 14:20:49.962 +Berry,Sweet,Red,2022-11-03 14:20:49.962 diff --git a/tests/test_extract_lambda.py b/tests/test_extract_lambda.py index e94a8a4..548ce67 100644 --- a/tests/test_extract_lambda.py +++ b/tests/test_extract_lambda.py @@ -1,13 +1,25 @@ +import boto3.exceptions +import botocore.exceptions import pytest import boto3 from moto import mock_aws from unittest.mock import patch, MagicMock from unittest import TestCase -from src.extract_lambda import list_existing_s3_files, connect_to_database, DBConnectionException, process_and_upload_tables -import os +import os import logging - -@pytest.fixture(scope='class') +import json +from src.extract_lambda import ( + list_existing_s3_files, + connect_to_database, + DBConnectionException, + lambda_handler, + process_and_upload_tables, + retrieve_secrets, + extract_bucket, +) + + +@pytest.fixture(scope="class") def mock_config(): env_vars = { "host": "abc", @@ -16,94 +28,220 @@ def mock_config(): "password": "password", "database": "db", } - with patch("src.extract_lambda.get_config", return_value=env_vars) as mock_config: + with patch( + "src.extract_lambda.retrieve_secrets", return_value=env_vars + ) as mock_config: yield mock_config -@pytest.fixture(scope='class') +@pytest.fixture(scope="class") def aws_credentials(): - os.environ["AWS_ACCESS_KEY_ID"] = 'testing' - os.environ["AWS_SECRET_ACCESS_KEY"] = 'testing' - os.environ["AWS_SECURIT_TOKEN"] = 'testing' - os.environ["AWS_SESSION_TOKEN"] = 'testing' - os.environ["AWS_DEFAULT_REGION"]= 'eu-west-2' + os.environ["AWS_ACCESS_KEY_ID"] = "testing" + os.environ["AWS_SECRET_ACCESS_KEY"] = "testing" + os.environ["AWS_SECURITY_TOKEN"] = "testing" + os.environ["AWS_SESSION_TOKEN"] = "testing" + os.environ["AWS_DEFAULT_REGION"] = "eu-west-2" + -@pytest.fixture(scope='class') +@pytest.fixture(scope="class") def s3_client(aws_credentials): with mock_aws(): - yield boto3.client('s3') + yield boto3.client("s3") + + +@pytest.fixture(scope="class") +def s3_mock_bucket(s3_client): + bucket = s3_client.create_bucket( + Bucket="extract_bucket", + CreateBucketConfiguration={"LocationConstraint": "eu-west-2"}, + ) + return bucket + + +class TestLambdaHandler: + def test_files_processed_and_uploaded_successfully(self, mocker): + mock_db = MagicMock() + mock_db.run.side_effect = [ + [["Fruits"]], + [["Vegetable", "Sour", "Green"], ["Berry", "Sweet", "Red"]], + [["Food_type"], ["Flavour"], ["Colour"]], + ] + mock_db.columns.return_value = [ + {"name": "Food_type"}, + {"name": "Flavour"}, + {"name": "Colour"}, + ] + with patch("src.extract_lambda.connect_to_database", return_value=mock_db): + mock_process_and_upload_tables = mocker.patch( + "src.extract_lambda.process_and_upload_tables", + return_value={ + "updated": ["Fruits"], + "no change": ["Vegetable", "Berry"], + }, + ) + mock_list_existing_s3_files = mocker.patch( + "src.extract_lambda.list_existing_s3_files", return_value={} + ) + event = {} + context = {} + response = lambda_handler(event, context) + assert response["statusCode"] == 200 + assert json.loads(response["body"]) == ( + "CSV files processed for Fruits and uploaded successfully." + "The following tables were not updated: Vegetable, Berry" + ) + mock_list_existing_s3_files.assert_called_once() + mock_process_and_upload_tables.assert_called_once_with(mock_db, {}) + mock_db.close.assert_called_once() + + def test_no_changes_detected_no_files_uploaded(self, mocker): + mock_db = MagicMock() + mock_db.run.side_effect = [ + [["Fruits"]], + [["Vegetable", "Sour", "Green"], ["Berry", "Sweet", "Red"]], + [["Food_type"], ["Flavour"], ["Colour"]], + ] + mock_db.columns.return_value = [ + {"name": "Food_type"}, + {"name": "Flavour"}, + {"name": "Colour"}, + ] + + with patch("src.extract_lambda.connect_to_database", return_value=mock_db): + mock_process_and_upload_tables = mocker.patch( + "src.extract_lambda.process_and_upload_tables", + return_value={"updated": [], "no change": ["Fruits"]}, + ) + mock_list_existing_s3_files = mocker.patch( + "src.extract_lambda.list_existing_s3_files", return_value={} + ) + event = {} + context = {} + response = lambda_handler(event, context) + assert response["statusCode"] == 200 + assert ( + json.loads(response["body"]) + == "No changes detected, no CSV files were uploaded." + ) + mock_list_existing_s3_files.assert_called_once() + mock_process_and_upload_tables.assert_called_once_with(mock_db, {}) + mock_db.close.assert_called_once() + + def test_exception_error(self, mocker): + with patch( + "src.extract_lambda.connect_to_database", + side_effect=Exception("Database connection error"), + ): + mock_process_and_upload_tables = mocker.patch( + "src.extract_lambda.process_and_upload_tables" + ) + mock_list_existing_s3_files = mocker.patch( + "src.extract_lambda.list_existing_s3_files" + ) + event = {} + context = {} + response = lambda_handler(event, context) + assert response["statusCode"] == 500 + assert json.loads(response["body"]) == "Internal server error." + mock_list_existing_s3_files.assert_not_called() + mock_process_and_upload_tables.assert_not_called() + + +class TestExtractBucket: + def test_extract_bucket_returns_bucket_name(self, s3_client, s3_mock_bucket): + result = extract_bucket(s3_client) + assert result == "extract_bucket" + + def test_bucket_returns_first_bucket(self, s3_client): + bucket1 = s3_client.create_bucket( + Bucket="bucket1", + CreateBucketConfiguration={"LocationConstraint": "eu-west-2"}, + ) + result = extract_bucket(s3_client) + assert result == "extract_bucket" + + def test_returns_index_error_if_no_buckets(self, s3_client): + s3_client.delete_bucket(Bucket="extract_bucket") + s3_client.delete_bucket(Bucket="bucket1") + + with pytest.raises(IndexError, match="list index out of range"): + extract_bucket(s3_client) + class TestListExistingS3Files: def test_error_if_no_bucket(self, s3_client, caplog): - logger = logging.getLogger() - logger.info('Testing now.') + logger.info("Testing now.") caplog.set_level(logging.ERROR) list_existing_s3_files(client=s3_client) - assert 'Error listing S3 objects' in caplog.text - - def test_error_if_bucket_is_empty(self, s3_client, caplog): - - s3_client.create_bucket(Bucket='extract_bucket', - CreateBucketConfiguration={ - 'LocationConstraint': 'eu-west-2' - }) - list_existing_s3_files(client=s3_client) - assert 'The bucket is empty' in caplog.text - - def test_error_retrieving_object(self, s3_client, caplog): - s3_client.upload_file('tests/dummy.txt', 'extract_bucket', 'dummy.txt') - list_existing_s3_files(bucket_name='test_bucket', client=s3_client) + assert "Error listing S3 objects" in caplog.text - assert 'Error retrieving S3 object ' in caplog.text + def test_error_if_bucket_is_empty(self, s3_client, caplog, s3_mock_bucket): + list_existing_s3_files("extract_bucket", client=s3_client) + assert "The bucket is empty" in caplog.text - def test_retrieves_file_content(self, s3_client, caplog): - result = list_existing_s3_files(client=s3_client) + def test_retrieves_file_content(self, s3_client, caplog, s3_mock_bucket): + s3_client.upload_file("tests/dummy.txt", "extract_bucket", "dummy.txt") + result = list_existing_s3_files("extract_bucket", client=s3_client) + assert list(result.values()) == ["This is a test file."] - assert list(result.values()) == ['This is a test file.'] class TestConnectToDatabase: def test_connect_to_database(mock_conn, mock_config): - with patch("src.extract_lambda.Connection", autospec=True) as mock_conn: + with patch("src.extract_lambda.Connection", autospec=True) as mock_conn: connect_to_database() mock_conn.assert_called_with( - host="abc", user="def", port="5432", password="password", database="db" + host="abc", user="def", port="5432", password="password", database="db" ) - def test_database_error(self, mock_config): + def test_database_error(self, mock_config): # had mock_config in param with pytest.raises(DBConnectionException): connect_to_database() def test_logs_interface_error(self, caplog): logger = logging.getLogger() - logger.info('Testing now.') + logger.info("Testing now.") caplog.set_level(logging.ERROR) with pytest.raises(DBConnectionException): connect_to_database() - assert 'Interface error' in caplog.text -''' + assert "Interface error" in caplog.text + + class TestProcessAndUploadTables: - def test_error_process_and_upload_tables(mock_conn, mock_config, s3_client, caplog): - logger = logging.getLogger() - logger.info('Testing now.') - caplog.set_level(logging.ERROR) - #### - queries = ["SELECT table_name FROM information_schema.tables WHERE table_schema='public' AND table_type='BASE TABLE';", - "SELECT * FROM Fruits;", - "SELECT column_name FROM INFORMATION_SCHEMA.COLUMNS where table_name = 'Fruits'"] - return_values = [[['Fruits']], - [['Vegetable','Sour','Green'],['Berry','Sweet','Red']], - [['Food_type'],['Flavour'],['Colour']]] - vals = dict(zip(queries,return_values)) - - #### - with patch('src.extract_lambda.connect_to_database') as mock_db: - mock_db().run.side_effects = return_values - s3_key = 'Fruits/2024/08/15/Fruits_16:46:30.csv' - existing_files = {s3_key: 'Food_type,Flavour,Colour\nFruit,Sour,Green\nBerry,Sweet,Red'} - s3_client.create_bucket(Bucket='extract_bucket', - CreateBucketConfiguration={'LocationConstraint': 'eu-west-2'}) - s3_client.upload_file('tests/dummy_identical.csv', 'extract_bucket', s3_key) + def test_error_process_and_upload_tables(self, mock_conn, s3_client, caplog): + caplog.set_level(logging.INFO) + + # Mock return values for database queries + queries = [ + "SELECT table_name FROM information_schema.tables WHERE table_schema='public' AND table_type='BASE TABLE';", + "SELECT * FROM Fruits WHERE last_updated > :latest;", + "SELECT column_name FROM INFORMATION_SCHEMA.COLUMNS where table_name = 'Fruits';", + ] + return_values = [ + [["Fruits"]], + [], # No new rows with a more recent last_updated timestamp + [["Food_type"], ["Flavour"], ["Colour"], ["last_updated"]], + ] + vals = dict(zip(queries, return_values)) + + # Patch the database connection and set return values for queries + with patch("src.extract_lambda.Connection") as mock_db: + mock_db().run.side_effect = return_values + s3_key = "Fruits/2024/08/15/Fruits_16:46:30.csv" + existing_files = { + s3_key: "Food_type,Flavour,Colour,last_updated\nVegetable,Sour,Green,2022-11-03 14:20:49.962\nBerry,Sweet,Red,2022-11-03 14:20:49.962" + } + + # Simulate S3 bucket and file setup + s3_client.create_bucket( + Bucket="test_extract_bucket", + CreateBucketConfiguration={"LocationConstraint": "eu-west-2"}, + ) + s3_client.upload_file( + "tests/dummy_identical.csv", "test_extract_bucket", s3_key + ) + + # Run the process_and_upload_tables function process_and_upload_tables(mock_db(), existing_files, client=s3_client) - assert 'No new data.' in caplog.text -'''
\ No newline at end of file + # Assert that the log contains "No new data" + assert "No new data" in caplog.text diff --git a/tests/test_secrets_manager.py b/tests/test_secrets_manager.py index a30be86..609c572 100644 --- a/tests/test_secrets_manager.py +++ b/tests/test_secrets_manager.py @@ -3,10 +3,11 @@ import boto3 import botocore.exceptions from moto import mock_aws import json -import pytest +import pytest import os -@pytest.fixture(scope='function') + +@pytest.fixture(scope="function") def aws_credentials(): """Mocked AWS Credentials for moto.""" os.environ["AWS_ACCESS_KEY_ID"] = "testing" @@ -15,12 +16,14 @@ def aws_credentials(): os.environ["AWS_SESSION_TOKEN"] = "testing" os.environ["AWS_DEFAULT_REGION"] = "eu-west-2" -@pytest.fixture(scope='function') + +@pytest.fixture(scope="function") def mock_sm_client(aws_credentials): with mock_aws(): yield boto3.client("secretsmanager") -@pytest.fixture(scope='function') + +@pytest.fixture(scope="function") def mock_store_secret(mock_sm_client): secret = { "cohort_id": "test_cohort_id", @@ -28,15 +31,18 @@ def mock_store_secret(mock_sm_client): "password": "test_password", "host": "test_host", "database": "test_database", - "port": "test_port" + "port": "test_port", } secret_name = "test_secret" - response = mock_sm_client.create_secret(Name=secret_name, SecretString=json.dumps(secret)) + response = mock_sm_client.create_secret( + Name=secret_name, SecretString=json.dumps(secret) + ) return response + def test_retrieves_secrets_returns_dictionary(mock_sm_client, mock_store_secret): secret_name = "test_secret" @@ -44,8 +50,10 @@ def test_retrieves_secrets_returns_dictionary(mock_sm_client, mock_store_secret) assert isinstance(result, dict) -def test_retrieves_secrets_returns_correct_keys_and_values(mock_sm_client, mock_store_secret): +def test_retrieves_secrets_returns_correct_keys_and_values( + mock_sm_client, mock_store_secret +): secret_name = "test_secret" result = retrieve_secrets(mock_sm_client, secret_name) @@ -57,17 +65,20 @@ def test_retrieves_secrets_returns_correct_keys_and_values(mock_sm_client, mock_ assert result["database"] == "test_database" assert result["port"] == "test_port" -def test_retrieves_secrets_raises_error_if_secret_name_incorrect_data_type(mock_sm_client): - secret_name = [1, 2, 3] +def test_retrieves_secrets_raises_error_if_secret_name_incorrect_data_type( + mock_sm_client, +): + secret_name = [1, 2, 3] with pytest.raises(botocore.exceptions.ParamValidationError) as error: retrieve_secrets(mock_sm_client, secret_name) -def test_retrieves_secrets_raises_error_if_secret_name_does_not_exist(mock_sm_client, mock_store_secret): - secret_name = 'test_secret_2' - +def test_retrieves_secrets_raises_error_if_secret_name_does_not_exist( + mock_sm_client, mock_store_secret +): + secret_name = "test_secret_2" with pytest.raises(botocore.exceptions.ClientError) as error: - retrieve_secrets(mock_sm_client, secret_name)
\ No newline at end of file + retrieve_secrets(mock_sm_client, secret_name) |
