aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAlex <git@ajschof.me>2024-08-16 15:37:46 +0100
committerGitHub <noreply@github.com>2024-08-16 15:37:46 +0100
commit9d189d7c8c4fae09ece780521ab9c10935be8aaf (patch)
tree03938087ec6c91a744ea54eae056dea0eece549e
parente153f2072eafca2c83a84e2c4210c46a40dabaf4 (diff)
parenteb3353ea0df1eee2df2c6a9a9efad121e56e1790 (diff)
downloadde-project-bentley-9d189d7c8c4fae09ece780521ab9c10935be8aaf.tar.gz
de-project-bentley-9d189d7c8c4fae09ece780521ab9c10935be8aaf.zip
Merge branch 'development' into extract_lambda_refactoring
-rw-r--r--.deepsource.toml27
-rw-r--r--.github/workflows/deploy.yml13
-rw-r--r--.github/workflows/on-commit.yml50
-rw-r--r--DEVNOTES.md100
-rw-r--r--Makefile80
-rw-r--r--README.md44
6 files changed, 159 insertions, 155 deletions
diff --git a/.deepsource.toml b/.deepsource.toml
new file mode 100644
index 0000000..a840b78
--- /dev/null
+++ b/.deepsource.toml
@@ -0,0 +1,27 @@
+version = 1
+
+[[analyzers]]
+name = "sql"
+
+[[analyzers]]
+name = "terraform"
+
+[[analyzers]]
+name = "python"
+
+ [analyzers.meta]
+ runtime_version = "3.x.x"
+
+[[analyzers]]
+name = "secrets"
+
+[[transformers]]
+name = "black"
+
+[[transformers]]
+name = "autopep8"
+
+[[transformers]]
+name = "ruff"
+
+
diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml
index 372d0b3..5672048 100644
--- a/.github/workflows/deploy.yml
+++ b/.github/workflows/deploy.yml
@@ -1,19 +1,24 @@
name: deploy-terraform
on:
+ pull_request:
+ branches:
+ - main
push:
branches:
- - test-ci/** # Adjust the branch based on our deployment strategy
+ - main
+
jobs:
deploy-terraform:
name: Deploy Terraform
runs-on: ubuntu-latest
- environment: test-env
+ #needs: run-checks (must ref on-commit.yml file)
+ environment: production
steps:
- name: Checkout Repo
uses: actions/checkout@v4
-
+
- name: Install Terraform
uses: hashicorp/setup-terraform@v3
@@ -34,4 +39,4 @@ jobs:
- name: Terraform Apply
working-directory: terraform
- run: terraform apply --auto-approve \ No newline at end of file
+ run: terraform apply --auto-approve
diff --git a/.github/workflows/on-commit.yml b/.github/workflows/on-commit.yml
deleted file mode 100644
index fd9ffb8..0000000
--- a/.github/workflows/on-commit.yml
+++ /dev/null
@@ -1,50 +0,0 @@
-name: commit-qc-checks
-
-on:
- push:
- branches-ignore:
- - 'main'
-
-jobs:
- python-quality-checks:
- runs-on: ubuntu-latest
- steps:
- - uses : actions/checkout@v4
- - name : 'Python: Setup'
- uses : actions/setup-python@v5
- with:
- python-version: 3.11
- - name : 'Python: Install Dependencies'
- run: |
- python -m pip install --upgrade pip
- pip install flake8 pylint black bandit safety
- continue-on-error: true
- - name : 'Python: Linting'
- run: |
- flake8 .
- find . -name "*.py" | xargs pylint
- continue-on-error: true
- - name : 'Python: Formatting'
- run: |
- black --check .
- continue-on-error: true
- terraform-quality-checks:
- runs-on: ubuntu-latest
- steps:
- - uses : actions/checkout@v4
- - name: 'Terraform: Setup'
- uses: hashicorp/setup-terraform@v3
- with:
- terraform_version: latest
- - name: 'Terraform: Formatting'
- working-directory: terraform
- run: terraform fmt -check -recursive
- continue-on-error: true
- - name: 'Terraform: Initialise'
- working-directory: terraform
- run: terraform init -backend=false
- continue-on-error: true
- - name: 'Terraform: Validate'
- working-directory: terraform
- run: terraform validate
- continue-on-error: true \ No newline at end of file
diff --git a/DEVNOTES.md b/DEVNOTES.md
deleted file mode 100644
index 00b4ddd..0000000
--- a/DEVNOTES.md
+++ /dev/null
@@ -1,100 +0,0 @@
-# Workflow
-
-## References
-
-https://nvie.com/posts/a-successful-git-branching-model/ \
-https://learn.microsoft.com/en-us/azure/devops/repos/git/merging-with-squash?view=azure-devops
-
-
-## Branching
-
-*Based off GitFlow but slightly modified*
-
-- There are two main branches
- - `main` - production-ready code
- - `development` - integration branch for features
- - `staging` - represents the current staging state
-- In addition, there are additional branches
- - Feature branches - for new features and non-urgent bugfixes
- - Hotfix branches - probably won't be used but for critical bugs in production (this is what testing should prevent)
- - Release branches - for preparation of production releases
-
-- Feature branches - e.g. `feature/short-description`
-- Bugfix branches - e.g. `bugfix/short-description`
-- Hotfix branches - e.g. `hotfix/short-description`
-- Release branches - e.g. `release/vX.Y.Z`
-
-### Examples
-```
-feature/add-data-extractor
-bugfix/fix-s3-upload-error
-hotfix/security-patch
-release/v1.0.0
-```
-
-## Environments
-
-1. Development - where active development and initial testing occur
-2. Staging - for integration testing and final checks before production
-3. Production - live and stable environment
-
-## Deployment
-
-1. `main` - represents the current production state
-2. `develop` - represents the integration branch for features and non-urgent fixes
-3. `staging` - represents the current staging state
-
-## Staging Flow
-
-1. Create feature branches from `develop` & merge completed features back into `develop`
-2. When the `develop` branch is ready for testing, create a `staging` branch from `develop`
-3. Deploy the `staging` branch to the staging environment and perform our unit-tests
-4. If staging tests pass, create a `release/vX.Y.Z` branch from `staging`
-5. Make any final adjustments in the `release/vX.Y.Z` branch
-6. Once we have approved the changes in the `release/vX.Y.Z` branch, merge into `main`
-7. Tag the release in `main`
-
-### Notes
-
-- No new features should be included in the release branches and any new features should be merged into `develop` for the next release cycle
-
-## Commit Messages
-
-Please follow the conventional commits specification:
-
-```
-<type>[optional scope]: <description>
-
-<optional body>
-
-[optional footer(s)]
-```
-
-### Types
-- feat: new features
-- fix: bugfixes
-- docs: documentation-only changes
-- style: changes that do not affect the meaning of the code
-- refactor: code changes that neither fix bugs nor adds features
-- perf: code changes that improve performance
-- test: adding tests or correcting existing tests
-- chore: changes to build process or tools/libraries (probably not needed)
-- infra: changes to infrastructure configuration (e.g. Terraform)
-
-### Examples
-```
-feat(extract): add automatic scheduling for data ingestion
-docs: update README with project setup instructions
-```
-
-Configuration files for things such as Terraform isn't native to Conventional Commits, but we can add our own:
-
-```
-infra(tf): update S3 bucket policy
-```
-
-If the Terraform change involves a fix, you may combine `fix` and `infra`:
-
-```
-fix(infra): ...
-```
diff --git a/Makefile b/Makefile
new file mode 100644
index 0000000..077cd98
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,80 @@
+##############################################
+# #
+# MAKEFILE TO BUILD THE PROJECT #
+# #
+##############################################
+
+PROJECT_NAME = de-project-bentley
+REGION = eu-west-2
+PYTHON_INTERPRETER = python
+WD=$(shell pwd)
+PYTHONPATH=${WD}
+SHELL := /bin/bash
+PROFILE = default
+PIP:=pip
+
+## PYTHON INTERPRETER ENVIRONMENT
+create-environment:
+ @echo ">>> About to create environment: $(PROJECT_NAME)..."
+ @echo ">>> check python3 version"
+ ( \
+ $(PYTHON_INTERPRETER) --version; \
+ )
+ @echo ">>> Setting up VirtualEnv."
+ ( \
+ $(PIP) install -q virtualenv virtualenvwrapper; \
+ virtualenv venv --python=$(PYTHON_INTERPRETER); \
+ )
+
+ACTIVATE_ENV := source venv/bin/activate
+
+# Execute python related functionalities from within the project's environment
+define execute_in_env
+ $(ACTIVATE_ENV) && $1
+endef
+
+## Build the environment requirements
+requirements: create-environment
+ $(call execute_in_env, $(PIP) install -r ./requirements.txt)
+
+# Set Up
+## Install bandit
+bandit:
+ $(call execute_in_env, $(PIP) install bandit)
+
+## Install safety
+safety:
+ $(call execute_in_env, $(PIP) install safety)
+
+## Install black
+black:
+ $(call execute_in_env, $(PIP) install black)
+
+## Install coverage
+coverage:
+ $(call execute_in_env, $(PIP) install coverage)
+
+## Set up dev requirements (bandit, safety, black)
+dev-setup: bandit safety black coverage
+
+# Build / Run
+
+## Run the security test (bandit + safety)
+security-test:
+ $(call execute_in_env, safety check -r ./requirements.txt)
+ $(call execute_in_env, bandit -lll */*.py *c/*/*.py)
+
+## Run the black code check
+run-black:
+ $(call execute_in_env, black ./src/*/*.py ./test/*/*.py)
+
+## Run the unit tests
+unit-test:
+ $(call execute_in_env, PYTHONPATH=${PYTHONPATH} pytest -v)
+
+## Run the coverage check
+check-coverage:
+ $(call execute_in_env, PYTHONPATH=${PYTHONPATH} pytest --cov=src test/)
+
+## Run all checks
+run-checks: security-test run-black unit-test check-coverage
diff --git a/README.md b/README.md
index 8ae0cb3..6bc75dc 100644
--- a/README.md
+++ b/README.md
@@ -1 +1,43 @@
-# de-project-bentley \ No newline at end of file
+# ToteSys - Data Engineering Project
+
+# Summary
+The project aims to implement a data platform that can extract data from an
+operational database, archive it in a data lake, and make it easily accessible
+within a remodelled OLAP data warehouse.
+
+The solution showcases our skills in:
+
+- Python
+- PostgreSQL
+- Database modelling
+- Amazon Web Services (AWS)
+- Agile methodologies
+
+# Main Objective
+
+Our goal is to create a reliable ETL (Extract, Transform, Load) pipeline that
+can:
+
+1. Extract the data from the `totesys` operational database
+2. Store the data in AWS S3 buckets, that will form our data lake
+3. Transform the data into a suitable schema for the data warehouse
+4. Load the transformed data into the data warehouse hosted on AWS
+
+# Key Features
+
+We aim for the project to have certain features. Some are more prioritised than
+others.
+
+- [ ] Automated data ingestion from `totesys` db
+- [ ] Data storage for ingested and processed data in S3 buckets
+- [ ] Data transformation for data warehouse schema
+- [ ] Automated data loading into the data warehouse schema
+- [ ] Logging and monitoring with CloudWatch
+- [ ] Notifications for errors and successful runs (e.g. successful ingestion)
+- [ ] Visualisation of warehouse data
+
+# Test Coverage
+TBA
+
+# Contributors
+TBA \ No newline at end of file
git.ajschof.me — hosted by ajschofield — powered by cgit