aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--.deepsource.toml27
-rw-r--r--.github/workflows/deploy.yml13
-rw-r--r--.github/workflows/on-commit.yml50
-rw-r--r--.gitignore1
-rw-r--r--DEVNOTES.md100
-rw-r--r--Makefile80
-rw-r--r--README.md52
-rw-r--r--src/extract_lambda.py196
-rw-r--r--src/load_lambda.py2
-rw-r--r--src/secrets_manager.py31
-rw-r--r--src/transform_lambda.py2
-rw-r--r--terraform/lambda.tf78
-rw-r--r--terraform/main.tf8
-rw-r--r--terraform/rds.tf128
-rw-r--r--terraform/s3.tf5
-rw-r--r--test/test_secrets_manager.py19
-rw-r--r--tests/test_extract_lambda.py6
-rw-r--r--tests/test_secrets_manager.py84
18 files changed, 531 insertions, 351 deletions
diff --git a/.deepsource.toml b/.deepsource.toml
new file mode 100644
index 0000000..a840b78
--- /dev/null
+++ b/.deepsource.toml
@@ -0,0 +1,27 @@
+version = 1
+
+[[analyzers]]
+name = "sql"
+
+[[analyzers]]
+name = "terraform"
+
+[[analyzers]]
+name = "python"
+
+ [analyzers.meta]
+ runtime_version = "3.x.x"
+
+[[analyzers]]
+name = "secrets"
+
+[[transformers]]
+name = "black"
+
+[[transformers]]
+name = "autopep8"
+
+[[transformers]]
+name = "ruff"
+
+
diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml
index 372d0b3..5672048 100644
--- a/.github/workflows/deploy.yml
+++ b/.github/workflows/deploy.yml
@@ -1,19 +1,24 @@
name: deploy-terraform
on:
+ pull_request:
+ branches:
+ - main
push:
branches:
- - test-ci/** # Adjust the branch based on our deployment strategy
+ - main
+
jobs:
deploy-terraform:
name: Deploy Terraform
runs-on: ubuntu-latest
- environment: test-env
+ #needs: run-checks (must ref on-commit.yml file)
+ environment: production
steps:
- name: Checkout Repo
uses: actions/checkout@v4
-
+
- name: Install Terraform
uses: hashicorp/setup-terraform@v3
@@ -34,4 +39,4 @@ jobs:
- name: Terraform Apply
working-directory: terraform
- run: terraform apply --auto-approve \ No newline at end of file
+ run: terraform apply --auto-approve
diff --git a/.github/workflows/on-commit.yml b/.github/workflows/on-commit.yml
deleted file mode 100644
index fd9ffb8..0000000
--- a/.github/workflows/on-commit.yml
+++ /dev/null
@@ -1,50 +0,0 @@
-name: commit-qc-checks
-
-on:
- push:
- branches-ignore:
- - 'main'
-
-jobs:
- python-quality-checks:
- runs-on: ubuntu-latest
- steps:
- - uses : actions/checkout@v4
- - name : 'Python: Setup'
- uses : actions/setup-python@v5
- with:
- python-version: 3.11
- - name : 'Python: Install Dependencies'
- run: |
- python -m pip install --upgrade pip
- pip install flake8 pylint black bandit safety
- continue-on-error: true
- - name : 'Python: Linting'
- run: |
- flake8 .
- find . -name "*.py" | xargs pylint
- continue-on-error: true
- - name : 'Python: Formatting'
- run: |
- black --check .
- continue-on-error: true
- terraform-quality-checks:
- runs-on: ubuntu-latest
- steps:
- - uses : actions/checkout@v4
- - name: 'Terraform: Setup'
- uses: hashicorp/setup-terraform@v3
- with:
- terraform_version: latest
- - name: 'Terraform: Formatting'
- working-directory: terraform
- run: terraform fmt -check -recursive
- continue-on-error: true
- - name: 'Terraform: Initialise'
- working-directory: terraform
- run: terraform init -backend=false
- continue-on-error: true
- - name: 'Terraform: Validate'
- working-directory: terraform
- run: terraform validate
- continue-on-error: true \ No newline at end of file
diff --git a/.gitignore b/.gitignore
index ca15434..6aa03fc 100644
--- a/.gitignore
+++ b/.gitignore
@@ -14,3 +14,4 @@ __pycache__/
# OS-Related Files
.DS_Store
+venv \ No newline at end of file
diff --git a/DEVNOTES.md b/DEVNOTES.md
deleted file mode 100644
index 00b4ddd..0000000
--- a/DEVNOTES.md
+++ /dev/null
@@ -1,100 +0,0 @@
-# Workflow
-
-## References
-
-https://nvie.com/posts/a-successful-git-branching-model/ \
-https://learn.microsoft.com/en-us/azure/devops/repos/git/merging-with-squash?view=azure-devops
-
-
-## Branching
-
-*Based off GitFlow but slightly modified*
-
-- There are two main branches
- - `main` - production-ready code
- - `development` - integration branch for features
- - `staging` - represents the current staging state
-- In addition, there are additional branches
- - Feature branches - for new features and non-urgent bugfixes
- - Hotfix branches - probably won't be used but for critical bugs in production (this is what testing should prevent)
- - Release branches - for preparation of production releases
-
-- Feature branches - e.g. `feature/short-description`
-- Bugfix branches - e.g. `bugfix/short-description`
-- Hotfix branches - e.g. `hotfix/short-description`
-- Release branches - e.g. `release/vX.Y.Z`
-
-### Examples
-```
-feature/add-data-extractor
-bugfix/fix-s3-upload-error
-hotfix/security-patch
-release/v1.0.0
-```
-
-## Environments
-
-1. Development - where active development and initial testing occur
-2. Staging - for integration testing and final checks before production
-3. Production - live and stable environment
-
-## Deployment
-
-1. `main` - represents the current production state
-2. `develop` - represents the integration branch for features and non-urgent fixes
-3. `staging` - represents the current staging state
-
-## Staging Flow
-
-1. Create feature branches from `develop` & merge completed features back into `develop`
-2. When the `develop` branch is ready for testing, create a `staging` branch from `develop`
-3. Deploy the `staging` branch to the staging environment and perform our unit-tests
-4. If staging tests pass, create a `release/vX.Y.Z` branch from `staging`
-5. Make any final adjustments in the `release/vX.Y.Z` branch
-6. Once we have approved the changes in the `release/vX.Y.Z` branch, merge into `main`
-7. Tag the release in `main`
-
-### Notes
-
-- No new features should be included in the release branches and any new features should be merged into `develop` for the next release cycle
-
-## Commit Messages
-
-Please follow the conventional commits specification:
-
-```
-<type>[optional scope]: <description>
-
-<optional body>
-
-[optional footer(s)]
-```
-
-### Types
-- feat: new features
-- fix: bugfixes
-- docs: documentation-only changes
-- style: changes that do not affect the meaning of the code
-- refactor: code changes that neither fix bugs nor adds features
-- perf: code changes that improve performance
-- test: adding tests or correcting existing tests
-- chore: changes to build process or tools/libraries (probably not needed)
-- infra: changes to infrastructure configuration (e.g. Terraform)
-
-### Examples
-```
-feat(extract): add automatic scheduling for data ingestion
-docs: update README with project setup instructions
-```
-
-Configuration files for things such as Terraform isn't native to Conventional Commits, but we can add our own:
-
-```
-infra(tf): update S3 bucket policy
-```
-
-If the Terraform change involves a fix, you may combine `fix` and `infra`:
-
-```
-fix(infra): ...
-```
diff --git a/Makefile b/Makefile
new file mode 100644
index 0000000..077cd98
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,80 @@
+##############################################
+# #
+# MAKEFILE TO BUILD THE PROJECT #
+# #
+##############################################
+
+PROJECT_NAME = de-project-bentley
+REGION = eu-west-2
+PYTHON_INTERPRETER = python
+WD=$(shell pwd)
+PYTHONPATH=${WD}
+SHELL := /bin/bash
+PROFILE = default
+PIP:=pip
+
+## PYTHON INTERPRETER ENVIRONMENT
+create-environment:
+ @echo ">>> About to create environment: $(PROJECT_NAME)..."
+ @echo ">>> check python3 version"
+ ( \
+ $(PYTHON_INTERPRETER) --version; \
+ )
+ @echo ">>> Setting up VirtualEnv."
+ ( \
+ $(PIP) install -q virtualenv virtualenvwrapper; \
+ virtualenv venv --python=$(PYTHON_INTERPRETER); \
+ )
+
+ACTIVATE_ENV := source venv/bin/activate
+
+# Execute python related functionalities from within the project's environment
+define execute_in_env
+ $(ACTIVATE_ENV) && $1
+endef
+
+## Build the environment requirements
+requirements: create-environment
+ $(call execute_in_env, $(PIP) install -r ./requirements.txt)
+
+# Set Up
+## Install bandit
+bandit:
+ $(call execute_in_env, $(PIP) install bandit)
+
+## Install safety
+safety:
+ $(call execute_in_env, $(PIP) install safety)
+
+## Install black
+black:
+ $(call execute_in_env, $(PIP) install black)
+
+## Install coverage
+coverage:
+ $(call execute_in_env, $(PIP) install coverage)
+
+## Set up dev requirements (bandit, safety, black)
+dev-setup: bandit safety black coverage
+
+# Build / Run
+
+## Run the security test (bandit + safety)
+security-test:
+ $(call execute_in_env, safety check -r ./requirements.txt)
+ $(call execute_in_env, bandit -lll */*.py *c/*/*.py)
+
+## Run the black code check
+run-black:
+ $(call execute_in_env, black ./src/*/*.py ./test/*/*.py)
+
+## Run the unit tests
+unit-test:
+ $(call execute_in_env, PYTHONPATH=${PYTHONPATH} pytest -v)
+
+## Run the coverage check
+check-coverage:
+ $(call execute_in_env, PYTHONPATH=${PYTHONPATH} pytest --cov=src test/)
+
+## Run all checks
+run-checks: security-test run-black unit-test check-coverage
diff --git a/README.md b/README.md
index 8ae0cb3..cbb446c 100644
--- a/README.md
+++ b/README.md
@@ -1 +1,51 @@
-# de-project-bentley \ No newline at end of file
+# ToteSys - Data Engineering Project
+
+[![Python](https://img.shields.io/badge/Python-FFD43B?style=for-the-badge&logo=python&logoColor=blue)](https://www.python.org/)
+[![AWS](https://img.shields.io/badge/Amazon_AWS-FF9900?style=for-the-badge&logo=amazonaws&logoColor=white)](https://aws.amazon.com/)
+[![Terraform](https://img.shields.io/badge/Terraform-7B42BC?style=for-the-badge&logo=terraform&logoColor=white)](https://www.terraform.io/)
+[![Postgresql](https://img.shields.io/badge/PostgreSQL-316192?style=for-the-badge&logo=postgresql&logoColor=white)](https://www.postgresql.org/)
+[![GitHub Actions](https://img.shields.io/badge/GitHub_Actions-2088FF?style=for-the-badge&logo=github-actions&logoColor=white)](https://github.com/features/actions)
+
+[![Terraform Main Deployment Workflow Status](https://img.shields.io/github/actions/workflow/status/ajschofield/de-project-bentley/deploy.yml?branch=main&style=flat-square&label=deploy)](https://github.com/ajschofield/de-project-bentley/actions/workflows/deploy.yml?query=branch%3Amain)
+[![Production Environment Status](https://img.shields.io/github/deployments/ajschofield/de-project-bentley/production?style=flat-square&label=env)](https://github.com/ajschofield/de-project-bentley/deployments/production)
+# Summary
+The project aims to implement a data platform that can extract data from an
+operational database, archive it in a data lake, and make it easily accessible
+within a remodelled OLAP data warehouse.
+
+The solution showcases our skills in:
+
+- Python
+- PostgreSQL
+- Database modelling
+- Amazon Web Services (AWS)
+- Agile methodologies
+
+# Main Objective
+
+Our goal is to create a reliable ETL (Extract, Transform, Load) pipeline that
+can:
+
+1. Extract the data from the `totesys` operational database
+2. Store the data in AWS S3 buckets, that will form our data lake
+3. Transform the data into a suitable schema for the data warehouse
+4. Load the transformed data into the data warehouse hosted on AWS
+
+# Key Features
+
+We aim for the project to have certain features. Some are more prioritised than
+others.
+
+- [ ] Automated data ingestion from `totesys` db
+- [ ] Data storage for ingested and processed data in S3 buckets
+- [ ] Data transformation for data warehouse schema
+- [ ] Automated data loading into the data warehouse schema
+- [ ] Logging and monitoring with CloudWatch
+- [ ] Notifications for errors and successful runs (e.g. successful ingestion)
+- [ ] Visualisation of warehouse data
+
+# Test Coverage
+TBA
+
+# Contributors
+TBA \ No newline at end of file
diff --git a/src/extract_lambda.py b/src/extract_lambda.py
index fb2d7e8..4168e27 100644
--- a/src/extract_lambda.py
+++ b/src/extract_lambda.py
@@ -1,5 +1,4 @@
-from pg8000.native import Connection, DatabaseError, InterfaceError
-from dotenv import dotenv_values
+from pg8000.native import Connection, InterfaceError, identifier
import boto3
import csv
from botocore.exceptions import ClientError
@@ -12,6 +11,8 @@ import re
logger = logging.getLogger()
logger.setLevel(logging.INFO)
+# DB Exception class
+
class DBConnectionException(Exception):
"""Wraps pg8000.native Error or DatabaseError."""
@@ -21,136 +22,167 @@ class DBConnectionException(Exception):
self.message = str(e)
super().__init__(self.message)
+
def lambda_handler(event, context):
"""This lambda function connects to the Totesys database, lists the contents of the ingestion bucket,
- and converts all tables to CSV and if any of those tables do not exist in, or are different to the ones in s3, it uploads them
- it uses 3 helper functions to achieve these 3 functionalities
+ and converts all tables to CSV and if any of those tables do not exist in, or are different to the ones in s3, it uploads them
+ it uses 3 helper functions to achieve these 3 functionalities
"""
try:
db = connect_to_database()
existing_files = list_existing_s3_files()
any_changes = process_and_upload_tables(db, existing_files)
-
- if not any_changes:
+
+ if not any_changes["updated"]:
logger.info("No changes detected in the database.")
return {
- 'statusCode': 200,
- 'body': json.dumps('No changes detected, no CSV files were uploaded.')
+ "statusCode": 200,
+ "body": json.dumps("No changes detected, no CSV files were uploaded."),
}
else:
return {
- 'statusCode': 200,
- 'body': json.dumps('CSV files processed and uploaded successfully.')
+ "statusCode": 200,
+ "body": json.dumps(
+ f"""CSV files processed for {', '.join(any_changes['updated'])} and uploaded successfully.{
+ 'The following tables were not updated: '+', '.join(any_changes['no change']) if any_changes['no change'] else ''}"""
+ ),
}
-
except Exception as e:
- logger.error(f'Error: {e}')
- return {
- 'statusCode': 500,
- 'body': json.dumps('Internal server error.')
- }
-
+ logger.error(f"Error: {e}")
+ return {"statusCode": 500, "body": json.dumps("Internal server error.")}
finally:
-
if db:
db.close()
-def get_config(path: str = ".env") -> dict:
- return dotenv_values(path)
+
+def retrieve_secrets(
+ sm_client=boto3.client("secretsmanager"), secret_name="bentley-secrets"
+):
+ try:
+ response = sm_client.get_secret_value(SecretId=secret_name)
+ if "SecretString" in response:
+ secret = json.loads(response["SecretString"])
+ return secret
+ except ClientError as e:
+ logger.error(f"Could not retrieve secrets: {e}")
+ raise e
def connect_to_database() -> Connection:
try:
- config = get_config()
- host = config["host"]
- port = config["port"]
- user = config["user"]
- password = config["password"]
- database = config["database"]
+ secrets = retrieve_secrets()
+ host = secrets["host"]
+ port = secrets["port"]
+ user = secrets["user"]
+ password = secrets["password"]
+ database = secrets["database"]
return Connection(
- database=database,
- user=user,
- password=password,
- host=host,
- port=port
+ database=database, user=user, password=password, host=host, port=port
)
except InterfaceError as i:
- logger.error(f'Interface error: {i}')
+ logger.error(f"Interface error: {i}")
raise DBConnectionException("Failed to connect to database")
+def extract_bucket(client=boto3.client("s3")):
+ response = client.list_buckets()
+ extract_bucket_filter = [
+ bucket["Name"] for bucket in response["Buckets"] if "extract" in bucket["Name"]
+ ]
+ return extract_bucket_filter[0]
-def list_existing_s3_files(bucket_name='extract_bucket', client=boto3.client('s3')):
- """Creates a dictionary and populates it with the
- results of listing the contents of the s3 bucket, then
- returns the populated dictionary
+
+def list_existing_s3_files(bucket_name=extract_bucket(), client=boto3.client("s3")):
+ """Creates a dictionary and populates it with the
+ results of listing the contents of the s3 bucket, then
+ returns the populated dictionary
"""
-
+
existing_files = {}
-
+
try:
- response = client.list_objects_v2(Bucket='extract_bucket')
-
- if 'Contents' in response:
- for obj in response['Contents']:
- s3_key = obj['Key']
+ response = client.list_objects_v2(Bucket=bucket_name)
+
+ if "Contents" in response:
+ for obj in response["Contents"]:
+ s3_key = obj["Key"]
try:
file_obj = client.get_object(Bucket=bucket_name, Key=s3_key)
- file_content = file_obj['Body'].read().decode('utf-8')
+ file_content = file_obj["Body"].read().decode("utf-8")
existing_files[s3_key] = file_content
except ClientError as e:
- logger.error(f'Error retrieving S3 object {s3_key}: {e}')
+ logger.error(f"Error retrieving S3 object {s3_key}: {e}")
else:
- logger.error('The bucket is empty')
-
+ logger.error("The bucket is empty")
+
except ClientError as e:
- logger.error(f'Error listing S3 objects: {e}')
-
- return existing_files
+ logger.error(f"Error listing S3 objects: {e}")
+ return existing_files
-def process_and_upload_tables(db, existing_files, client=boto3.client('s3')):
- """Creates a list of the tables from a database query and
- then selects everything from each table in individual queries
- it then writes each table to CSV files and compares with the item
- in the existing_files dictionary with the same name. If it finds any changes
- to files, or new tables/files it uploads them to the s3 bucket
+def process_and_upload_tables(db, existing_files, client=boto3.client("s3")):
+ """Creates a list of the tables from a database query and
+ then selects everything from each table in individual queries
+ it then writes each table to CSV files and compares with the item
+ in the existing_files dictionary with the same name. If it finds any changes
+ to files, or new tables/files it uploads them to the s3 bucket
"""
- ## NEW CODE
+ load_status = {"updated": [], "no change": []}
+ # Retrieving the latest file timestamp from S3 extract bucket
all_datetimes = []
for file_names in existing_files.keys():
- datetime_str_on_s3 = ''.join(re.search(r'\/(.+/).+_(.+)\.csv',file_names).group(1,2))
- all_datetimes.append(datetime.strptime(datetime_str_on_s3, '%Y/%m/%d/%H:%M:%S'))
+ datetime_str_on_s3 = "".join(
+ re.search(r"\/(.+/).+_(.+)\.csv", file_names).group(1, 2)
+ )
+ all_datetimes.append(datetime.strptime(datetime_str_on_s3, "%Y/%m/%d/%H:%M:%S"))
latest_timestamp = max(all_datetimes)
- ## END OF NEW CODE
- tables = db.run("SELECT table_name FROM information_schema.tables WHERE table_schema='public' AND table_type='BASE TABLE';")
- print(tables)
+ # Iterating through tables on the database and retrieving only latest changes vs previous file load
+ tables = db.run(
+ """
+ SELECT table_name
+ FROM information_schema.tables
+ WHERE table_schema='public' AND table_type='BASE TABLE';"""
+ )
for table in tables:
table_name = table[0]
- rows = db.run(f"SELECT * FROM {table_name};")
-
-
- csv_file_path = f"/tmp/{table_name}.csv"
- with open(csv_file_path, "w", newline='') as file:
- writer = csv.writer(file)
- #column_names = [desc["name"] for desc in db.columns(f"SELECT * FROM {table_name};")]
- column_names = [col_name[0] for col_name in db.run(f"SELECT column_name FROM INFORMATION_SCHEMA.COLUMNS where table_name = '{table_name}';")]
- writer.writerow(column_names)
- writer.writerows(rows)
- s3_key = datetime.strftime(datetime.today(),f'{table_name}/%Y/%m/%d/{table_name}_%H:%M:%S.csv')
- new_csv_content = open(csv_file_path, "r").read()
- ## NEW CODE
- latest_s3_object_key = datetime.strftime(latest_timestamp,f'{table_name}/%Y/%m/%d/{table_name}_%H:%M:%S.csv')
- ## END OF NEW CODE
- if existing_files[latest_s3_object_key] != new_csv_content:
+ rows = db.run(
+ f"SELECT * FROM {identifier(table_name)} " "WHERE last_updated >= :latest;",
+ latest={datetime.strftime(latest_timestamp, "%H-%m-%d %H:%M:%S")},
+ )
+
+ # Creating a temporary file path and writing the column name to it followed by each row of data
+ if rows:
+ csv_file_path = f"/tmp/{table_name}.csv"
+ with open(csv_file_path, "w", newline="") as file:
+ writer = csv.writer(file)
+ # column_names = [desc["name"] for desc in db.columns(f"SELECT * FROM {table_name};")]
+ column_names = [
+ col_name[0]
+ for col_name in db.run(
+ """SELECT column_name FROM INFORMATION_SCHEMA.COLUMNS
+ WHERE table_name = :table ;""",
+ table=table_name,
+ )
+ ]
+ writer.writerow(column_names)
+ writer.writerows(rows)
+ s3_key = datetime.strftime(
+ datetime.today(), f"{table_name}/%Y/%m/%d/{table_name}_%H:%M:%S.csv"
+ )
+
+ # Writing the new file to S3 extract bucket:
try:
- client.upload_file(csv_file_path, 'extract_bucket', s3_key)
+ client.upload_file(csv_file_path, extract_bucket(), s3_key)
+ load_status["updated"].append(table_name)
logger.info(f"Uploaded {s3_key} to S3.")
except ClientError as e:
- logger.error(f'Error uploading to S3: {e}')
+ logger.error(f"Error uploading to S3: {e}")
else:
- logger.info(f"No new data.")
- \ No newline at end of file
+ load_status["no change"].append(table_name)
+ logger.info(
+ f"No new data in {table_name} name. Latest data retrieved is from {latest_timestamp}."
+ )
+ return load_status
diff --git a/src/load_lambda.py b/src/load_lambda.py
index 6ee681f..c6a8e60 100644
--- a/src/load_lambda.py
+++ b/src/load_lambda.py
@@ -1,2 +1,2 @@
def lambda_handler():
- pass \ No newline at end of file
+ pass
diff --git a/src/secrets_manager.py b/src/secrets_manager.py
index c0fb61e..3484688 100644
--- a/src/secrets_manager.py
+++ b/src/secrets_manager.py
@@ -4,45 +4,46 @@ import json
def sm_client():
- sm_client = boto3.client('secretsmanager')
+ sm_client = boto3.client("secretsmanager")
yield sm_client
-def create_secret(sm_client, secret_name, cohort_id, user, password, host, database, port):
+
+def create_secret(
+ sm_client, secret_name, cohort_id, user, password, host, database, port
+):
secret = {
"cohort_id": cohort_id,
"user": user,
"password": password,
"host": host,
"database": database,
- "port": port
+ "port": port,
}
response = sm_client.create_secret(
- Name = secret_name,
- SecretString = json.dumps(secret)
+ Name=secret_name, SecretString=json.dumps(secret)
)
print(response)
return response
+
def list_secret(sm_client):
response = sm_client.list_secrets()
- secret_dict = response['SecretList']
+ secret_dict = response["SecretList"]
secret_names = []
for items in secret_dict:
- secret_names.append(items['Name'])
- print(f'{len(secret_names)} secret(s) available')
+ secret_names.append(items["Name"])
+ print(f"{len(secret_names)} secret(s) available")
for name in secret_names:
print(name)
return secret_names
-def retrieve_secrets(sm_client):
- response = sm_client.get_secrets(
-
- )
+def retrieve_secrets(sm_client):
+ response = sm_client.get_secrets()
-#retrieve secret
-#so lambda can access totesy db
-#so lambda connect to the db and then retrieve the data \ No newline at end of file
+# retrieve secret
+# so lambda can access totesy db
+# so lambda connect to the db and then retrieve the data
diff --git a/src/transform_lambda.py b/src/transform_lambda.py
index 6ee681f..c6a8e60 100644
--- a/src/transform_lambda.py
+++ b/src/transform_lambda.py
@@ -1,2 +1,2 @@
def lambda_handler():
- pass \ No newline at end of file
+ pass
diff --git a/terraform/lambda.tf b/terraform/lambda.tf
index 72d1306..e33bc79 100644
--- a/terraform/lambda.tf
+++ b/terraform/lambda.tf
@@ -12,12 +12,14 @@ resource "aws_s3_object" "extract_lambda_code" {
}
resource "aws_lambda_function" "extract_lambda" {
- function_name = var.extract_lambda_name
- s3_bucket = aws_s3_bucket.lambda_code_bucket.bucket
- s3_key = aws_s3_object.extract_lambda_code.key
- role = aws_iam_role.multi_service_role.arn
- handler = "extract_lambda.extract"
- runtime = "python3.11"
+ function_name = var.extract_lambda_name
+ s3_bucket = aws_s3_bucket.lambda_code_bucket.bucket
+ s3_key = aws_s3_object.extract_lambda_code.key
+ layers = [aws_lambda_layer_version.lambda_layer.arn]
+ role = aws_iam_role.multi_service_role.arn
+ handler = "extract_lambda.lambda_handler"
+ runtime = "python3.11"
+ source_code_hash = data.archive_file.extract_lambda_zip.output_base64sha256
lifecycle {
create_before_destroy = true
@@ -40,12 +42,14 @@ resource "aws_s3_object" "transform_lambda_code" {
}
resource "aws_lambda_function" "transform_lambda" {
- function_name = var.transform_lambda_name
- s3_bucket = aws_s3_bucket.lambda_code_bucket.bucket
- s3_key = aws_s3_object.transform_lambda_code.key
- role = aws_iam_role.multi_service_role.arn
- handler = "transform_lambda.transform"
- runtime = "python3.11"
+ function_name = var.transform_lambda_name
+ s3_bucket = aws_s3_bucket.lambda_code_bucket.bucket
+ s3_key = aws_s3_object.transform_lambda_code.key
+ layers = [aws_lambda_layer_version.lambda_layer.arn]
+ role = aws_iam_role.multi_service_role.arn
+ handler = "transform_lambda.lambda_handler"
+ runtime = "python3.11"
+ source_code_hash = data.archive_file.transform_lambda_zip.output_base64sha256
lifecycle {
create_before_destroy = true
@@ -68,12 +72,14 @@ resource "aws_s3_object" "load_lambda_code" {
}
resource "aws_lambda_function" "load_lambda" {
- function_name = var.load_lambda_name
- s3_bucket = aws_s3_bucket.lambda_code_bucket.bucket
- s3_key = aws_s3_object.load_lambda_code.key
- role = aws_iam_role.multi_service_role.arn
- handler = "load_lambda.load"
- runtime = "python3.11"
+ function_name = var.load_lambda_name
+ s3_bucket = aws_s3_bucket.lambda_code_bucket.bucket
+ s3_key = aws_s3_object.load_lambda_code.key
+ layers = [aws_lambda_layer_version.lambda_layer.arn]
+ role = aws_iam_role.multi_service_role.arn
+ handler = "load_lambda.lambda_handler"
+ runtime = "python3.11"
+ source_code_hash = data.archive_file.load_lambda_zip.output_base64sha256
lifecycle {
create_before_destroy = true
@@ -81,3 +87,39 @@ resource "aws_lambda_function" "load_lambda" {
depends_on = [aws_s3_object.load_lambda_code]
}
+
+# Lambda Layer Specification
+locals {
+ layer_dir = "lambda_layer"
+ requirements = "requirements.txt"
+ layer_zip = "layer.zip"
+ layer_name = "lambda_layer_dev"
+}
+
+resource "null_resource" "prepare_layer" {
+ provisioner "local-exec" {
+ command = <<EOT
+ cd ${local.layer_dir}
+ rm -rf python
+ mkdir python
+ pip3 install -r ${local.requirements} -t python/
+ zip -r ${local.layer_zip} python
+ EOT
+ } #removed / at the end of python in line 99
+}
+
+resource "aws_s3_object" "lambda_layer_zip" {
+ bucket = aws_s3_bucket.lambda_code_bucket.id #bucket instead of id
+ key = "lambda_layer/${local.layer_name}/${local.layer_zip}"
+ source = "${local.layer_dir}/${local.layer_zip}"
+ depends_on = [null_resource.prepare_layer]
+}
+
+resource "aws_lambda_layer_version" "lambda_layer" {
+ layer_name = local.layer_name
+ compatible_runtimes = ["python3.11"]
+ s3_bucket = aws_s3_bucket.lambda_layer_bucket.id #bucket instead of id
+ s3_key = aws_s3_object.lambda_layer_zip.key
+ skip_destroy = true
+ depends_on = [aws_s3_object.lambda_layer_zip]
+}
diff --git a/terraform/main.tf b/terraform/main.tf
index 3b06701..310a251 100644
--- a/terraform/main.tf
+++ b/terraform/main.tf
@@ -4,6 +4,14 @@ terraform {
source = "hashicorp/aws"
version = "~>5.0"
}
+ null = {
+ source = "hashicorp/null"
+ version = "~>3.2.2"
+ }
+ archive = {
+ source = "hashicorp/archive"
+ version = "~>2.5.0"
+ }
}
backend "s3" {
bucket = "bentley-project-secrets"
diff --git a/terraform/rds.tf b/terraform/rds.tf
index 88783b7..a013fb3 100644
--- a/terraform/rds.tf
+++ b/terraform/rds.tf
@@ -1,80 +1,70 @@
-data "aws_availability_zones" "available" {}
+# data "aws_availability_zones" "available" {}
-module "vpc" {
- source = "terraform-aws-modules/vpc/aws"
- version = "5.12.1"
+# module "vpc" {
+# source = "terraform-aws-modules/vpc/aws"
+# version = "5.12.1"
- name = var.project_name
- cidr = "10.0.0.0/16"
- azs = data.aws_availability_zones.available.names
- public_subnets = ["10.0.4.0/24", "10.0.5.0/24", "10.0.6.0/24"]
- enable_dns_hostnames = true
- enable_dns_support = true
-}
+# name = var.project_name
+# cidr = "10.0.0.0/16"
+# azs = data.aws_availability_zones.available.names
+# public_subnets = ["10.0.4.0/24", "10.0.5.0/24", "10.0.6.0/24"]
+# enable_dns_hostnames = true
+# enable_dns_support = true
+# }
-resource "aws_db_subnet_group" "Terrific-Totes-sub-gr" {
- name = "tt-db-subnet"
- subnet_ids = module.vpc.public_subnets
+# resource "aws_db_subnet_group" "Terrific-Totes-sub-gr" {
+# name = "tt-db-subnet"
+# subnet_ids = module.vpc.public_subnets
- tags = {
- Name = "${var.project_name}"
- }
-}
+# tags = {
+# Name = "${var.project_name}"
+# }
+# }
-resource "aws_security_group" "rds" {
- name = "${var.project_name}-rds"
- vpc_id = module.vpc.vpc_id
+# resource "aws_security_group" "rds" {
+# name = "${var.project_name}-rds"
+# vpc_id = module.vpc.vpc_id
- ingress {
- from_port = 5432
- to_port = 5432
- protocol = "tcp"
- cidr_blocks = ["0.0.0.0/0"]
- }
+# ingress {
+# from_port = 5432
+# to_port = 5432
+# protocol = "tcp"
+# cidr_blocks = ["0.0.0.0/0"]
+# }
- egress {
- from_port = 5432
- to_port = 5432
- protocol = "tcp"
- cidr_blocks = ["0.0.0.0/0"]
- }
+# egress {
+# from_port = 5432
+# to_port = 5432
+# protocol = "tcp"
+# cidr_blocks = ["0.0.0.0/0"]
+# }
- tags = {
- Name = "${var.project_name}-rds"
- }
-}
+# tags = {
+# Name = "${var.project_name}-rds"
+# }
+# }
-resource "aws_db_parameter_group" "Terrific-Totes-param-gr" {
- name = "tt-db-param"
- family = "postgres14"
+# resource "aws_db_parameter_group" "Terrific-Totes-param-gr" {
+# name = "tt-db-param"
+# family = "postgres14"
- parameter {
- name = "log_connections"
- value = "1"
- }
-}
+# parameter {
+# name = "log_connections"
+# value = "1"
+# }
+# }
-resource "aws_db_instance" "terrific-totes-rds" {
- db_name = var.project_name
- instance_class = "db.t3.micro"
- allocated_storage = 5
- engine = "postgres"
- engine_version = "14.10"
- username = "totes"
- password = "totes123"
- # username = "user credentials for the root user" # we could use .env here
- # password = "user password for the root user" # we could use .env here
- ### alternatively to providing username nad password we can specify:
- # resource "aws_kms_key" "example_key" {
- # description = "Example KMS Key"
- # }
- # within the resource:
- # manage_master_user_password = true
- # master_user_secret_kms_key_id = aws_kms_key.example.key_id
- # }
- db_subnet_group_name = aws_db_subnet_group.Terrific-Totes-sub-gr.name
- vpc_security_group_ids = [aws_security_group.rds.id]
- parameter_group_name = aws_db_parameter_group.Terrific-Totes-param-gr.name
- publicly_accessible = false
- skip_final_snapshot = true
-}
+# resource "aws_db_instance" "terrific-totes-rds" {
+# db_name = var.project_name
+# instance_class = "db.t3.micro"
+# allocated_storage = 5
+# engine = "postgres"
+# engine_version = "14.10"
+# username = ""
+# password = ""
+# db_subnet_group_name = aws_db_subnet_group.Terrific-Totes-sub-gr.name
+# vpc_security_group_ids = [aws_security_group.rds.id]
+# parameter_group_name = aws_db_parameter_group.Terrific-Totes-param-gr.name
+# publicly_accessible = false
+# skip_final_snapshot = true
+# }
diff --git a/terraform/s3.tf b/terraform/s3.tf
index d5cdee3..b3a863c 100644
--- a/terraform/s3.tf
+++ b/terraform/s3.tf
@@ -12,3 +12,8 @@ resource "aws_s3_bucket" "transform_bucket" {
resource "aws_s3_bucket" "lambda_code_bucket" {
bucket_prefix = "${var.s3_code_bucket_name}-"
}
+
+### LAMBDA LAYER BUCKET
+resource "aws_s3_bucket" "lambda_layer_bucket" {
+ bucket_prefix = "lambda-layer-dev-"
+} \ No newline at end of file
diff --git a/test/test_secrets_manager.py b/test/test_secrets_manager.py
index 86533bc..cb4ec15 100644
--- a/test/test_secrets_manager.py
+++ b/test/test_secrets_manager.py
@@ -2,10 +2,12 @@ from src.secrets_manager import sm_client, create_secret, list_secret
import boto3
from moto import mock_aws
import json
-import pytest
+import pytest
import os
-pytest.fixture(scope='class')
+pytest.fixture(scope="class")
+
+
def mock_aws_credentials():
"""Mocked AWS Credentials for moto."""
os.environ["AWS_ACCESS_KEY_ID"] = "testing"
@@ -14,10 +16,11 @@ def mock_aws_credentials():
os.environ["AWS_SESSION_TOKEN"] = "testing"
os.environ["AWS_DEFAULT_REGION"] = "eu-west-2"
-@pytest.fixture(scope='class')
+
+@pytest.fixture(scope="class")
def mock_sm_client(mock_aws_credentials):
with mock_aws():
- yield boto3.client('secretsmanager')
+ yield boto3.client("secretsmanager")
def test_create_secret_stores_secrets(mock_sm_client):
@@ -29,6 +32,8 @@ def test_create_secret_stores_secrets(mock_sm_client):
port = "test_port"
secret_name = "test_secret"
- response = create_secret(mock_sm_client, secret_name, cohort_id, user, password, host, database, port)
-
- assert response['Name'] == secret_name \ No newline at end of file
+ response = create_secret(
+ mock_sm_client, secret_name, cohort_id, user, password, host, database, port
+ )
+
+ assert response["Name"] == secret_name
diff --git a/tests/test_extract_lambda.py b/tests/test_extract_lambda.py
index 67cb6d3..7707cbf 100644
--- a/tests/test_extract_lambda.py
+++ b/tests/test_extract_lambda.py
@@ -3,6 +3,9 @@ import boto3
from moto import mock_aws
from unittest.mock import patch, MagicMock
from unittest import TestCase
+import os
+import logging
+import json
from src.extract_lambda import (
list_existing_s3_files,
connect_to_database,
@@ -10,9 +13,6 @@ from src.extract_lambda import (
lambda_handler,
process_and_upload_tables,
)
-import os
-import logging
-import json
@pytest.fixture(scope="class")
diff --git a/tests/test_secrets_manager.py b/tests/test_secrets_manager.py
new file mode 100644
index 0000000..609c572
--- /dev/null
+++ b/tests/test_secrets_manager.py
@@ -0,0 +1,84 @@
+from src.secrets_manager import sm_client, retrieve_secrets
+import boto3
+import botocore.exceptions
+from moto import mock_aws
+import json
+import pytest
+import os
+
+
+@pytest.fixture(scope="function")
+def aws_credentials():
+ """Mocked AWS Credentials for moto."""
+ os.environ["AWS_ACCESS_KEY_ID"] = "testing"
+ os.environ["AWS_SECRET_ACCESS_KEY"] = "testing"
+ os.environ["AWS_SECURITY_TOKEN"] = "testing"
+ os.environ["AWS_SESSION_TOKEN"] = "testing"
+ os.environ["AWS_DEFAULT_REGION"] = "eu-west-2"
+
+
+@pytest.fixture(scope="function")
+def mock_sm_client(aws_credentials):
+ with mock_aws():
+ yield boto3.client("secretsmanager")
+
+
+@pytest.fixture(scope="function")
+def mock_store_secret(mock_sm_client):
+ secret = {
+ "cohort_id": "test_cohort_id",
+ "user": "test_user_id",
+ "password": "test_password",
+ "host": "test_host",
+ "database": "test_database",
+ "port": "test_port",
+ }
+
+ secret_name = "test_secret"
+
+ response = mock_sm_client.create_secret(
+ Name=secret_name, SecretString=json.dumps(secret)
+ )
+
+ return response
+
+
+def test_retrieves_secrets_returns_dictionary(mock_sm_client, mock_store_secret):
+ secret_name = "test_secret"
+
+ result = retrieve_secrets(mock_sm_client, secret_name)
+
+ assert isinstance(result, dict)
+
+
+def test_retrieves_secrets_returns_correct_keys_and_values(
+ mock_sm_client, mock_store_secret
+):
+ secret_name = "test_secret"
+
+ result = retrieve_secrets(mock_sm_client, secret_name)
+
+ assert result["cohort_id"] == "test_cohort_id"
+ assert result["user"] == "test_user_id"
+ assert result["password"] == "test_password"
+ assert result["host"] == "test_host"
+ assert result["database"] == "test_database"
+ assert result["port"] == "test_port"
+
+
+def test_retrieves_secrets_raises_error_if_secret_name_incorrect_data_type(
+ mock_sm_client,
+):
+ secret_name = [1, 2, 3]
+
+ with pytest.raises(botocore.exceptions.ParamValidationError) as error:
+ retrieve_secrets(mock_sm_client, secret_name)
+
+
+def test_retrieves_secrets_raises_error_if_secret_name_does_not_exist(
+ mock_sm_client, mock_store_secret
+):
+ secret_name = "test_secret_2"
+
+ with pytest.raises(botocore.exceptions.ClientError) as error:
+ retrieve_secrets(mock_sm_client, secret_name)
git.ajschof.me — hosted by ajschofield — powered by cgit