From c937a7e098d818dadbc769b3c9eb9fd93cc05af2 Mon Sep 17 00:00:00 2001 From: Alex Schofield Date: Fri, 16 Aug 2024 10:01:28 +0100 Subject: docs: rm DEVNOTES.md basically redundant now --- DEVNOTES.md | 100 ------------------------------------------------------------ 1 file changed, 100 deletions(-) delete mode 100644 DEVNOTES.md diff --git a/DEVNOTES.md b/DEVNOTES.md deleted file mode 100644 index 00b4ddd..0000000 --- a/DEVNOTES.md +++ /dev/null @@ -1,100 +0,0 @@ -# Workflow - -## References - -https://nvie.com/posts/a-successful-git-branching-model/ \ -https://learn.microsoft.com/en-us/azure/devops/repos/git/merging-with-squash?view=azure-devops - - -## Branching - -*Based off GitFlow but slightly modified* - -- There are two main branches - - `main` - production-ready code - - `development` - integration branch for features - - `staging` - represents the current staging state -- In addition, there are additional branches - - Feature branches - for new features and non-urgent bugfixes - - Hotfix branches - probably won't be used but for critical bugs in production (this is what testing should prevent) - - Release branches - for preparation of production releases - -- Feature branches - e.g. `feature/short-description` -- Bugfix branches - e.g. `bugfix/short-description` -- Hotfix branches - e.g. `hotfix/short-description` -- Release branches - e.g. `release/vX.Y.Z` - -### Examples -``` -feature/add-data-extractor -bugfix/fix-s3-upload-error -hotfix/security-patch -release/v1.0.0 -``` - -## Environments - -1. Development - where active development and initial testing occur -2. Staging - for integration testing and final checks before production -3. Production - live and stable environment - -## Deployment - -1. `main` - represents the current production state -2. `develop` - represents the integration branch for features and non-urgent fixes -3. `staging` - represents the current staging state - -## Staging Flow - -1. Create feature branches from `develop` & merge completed features back into `develop` -2. When the `develop` branch is ready for testing, create a `staging` branch from `develop` -3. Deploy the `staging` branch to the staging environment and perform our unit-tests -4. If staging tests pass, create a `release/vX.Y.Z` branch from `staging` -5. Make any final adjustments in the `release/vX.Y.Z` branch -6. Once we have approved the changes in the `release/vX.Y.Z` branch, merge into `main` -7. Tag the release in `main` - -### Notes - -- No new features should be included in the release branches and any new features should be merged into `develop` for the next release cycle - -## Commit Messages - -Please follow the conventional commits specification: - -``` -[optional scope]: - - - -[optional footer(s)] -``` - -### Types -- feat: new features -- fix: bugfixes -- docs: documentation-only changes -- style: changes that do not affect the meaning of the code -- refactor: code changes that neither fix bugs nor adds features -- perf: code changes that improve performance -- test: adding tests or correcting existing tests -- chore: changes to build process or tools/libraries (probably not needed) -- infra: changes to infrastructure configuration (e.g. Terraform) - -### Examples -``` -feat(extract): add automatic scheduling for data ingestion -docs: update README with project setup instructions -``` - -Configuration files for things such as Terraform isn't native to Conventional Commits, but we can add our own: - -``` -infra(tf): update S3 bucket policy -``` - -If the Terraform change involves a fix, you may combine `fix` and `infra`: - -``` -fix(infra): ... -``` -- cgit v1.2.3 From 861fd5fe8303c6558c7763477c89dc98fff23c57 Mon Sep 17 00:00:00 2001 From: lian-manonog Date: Fri, 16 Aug 2024 10:20:14 +0100 Subject: wip: pushing the ci-cd-branch to test terraform infra --- .github/workflows/deploy.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml index 372d0b3..922daee 100644 --- a/.github/workflows/deploy.yml +++ b/.github/workflows/deploy.yml @@ -3,7 +3,7 @@ name: deploy-terraform on: push: branches: - - test-ci/** # Adjust the branch based on our deployment strategy + - ci-cd-branch # Adjust the branch based on our deployment strategy jobs: deploy-terraform: -- cgit v1.2.3 From d25f05ba140cb85847ca604bef0e68b76a17ba62 Mon Sep 17 00:00:00 2001 From: Alex Schofield Date: Fri, 16 Aug 2024 10:34:50 +0100 Subject: docs: add draft summary section --- README.md | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 8ae0cb3..203482e 100644 --- a/README.md +++ b/README.md @@ -1 +1,14 @@ -# de-project-bentley \ No newline at end of file +# ToteSys - Data Engineering Project + +# Summary +The project aims to implement a data platform that can extract data from an +operational database, archive it in a data lake, and make it easily accessible +within a remodelled OLAP data warehouse. + +The solution showcases our skills in: + +- Python +- PostgreSQL +- Database modelling +- Amazon Web Services (AWS) +- Agile methodologies \ No newline at end of file -- cgit v1.2.3 From 9809e7ca1351d7b27f62b3c7c74db7124cab5dc9 Mon Sep 17 00:00:00 2001 From: Alex Schofield Date: Fri, 16 Aug 2024 10:40:00 +0100 Subject: docs: add draft main objective section --- README.md | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 203482e..e55cb16 100644 --- a/README.md +++ b/README.md @@ -11,4 +11,14 @@ The solution showcases our skills in: - PostgreSQL - Database modelling - Amazon Web Services (AWS) -- Agile methodologies \ No newline at end of file +- Agile methodologies + +# Main Objective + +Our goal is to create a reliable ETL (Extract, Transform, Load) pipeline that +can: + +1. Extract the data from the `totesys` operational database +2. Store the data in AWS S3 buckets, that will form our data lake +3. Transform the data into a suitable schema for the data warehouse +4. Load the data into the data warehouse hosted on AWS \ No newline at end of file -- cgit v1.2.3 From 37eb3bb7974904614867c7d0c2d4f6eccb39f22e Mon Sep 17 00:00:00 2001 From: Alex Schofield Date: Fri, 16 Aug 2024 10:41:01 +0100 Subject: docs(main_obj): clarify data being loaded into data warehouse --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index e55cb16..9c7baee 100644 --- a/README.md +++ b/README.md @@ -21,4 +21,4 @@ can: 1. Extract the data from the `totesys` operational database 2. Store the data in AWS S3 buckets, that will form our data lake 3. Transform the data into a suitable schema for the data warehouse -4. Load the data into the data warehouse hosted on AWS \ No newline at end of file +4. Load the transformed data into the data warehouse hosted on AWS \ No newline at end of file -- cgit v1.2.3 From 67a3caf058416718e9413520cb74be049af1e93e Mon Sep 17 00:00:00 2001 From: Alex Schofield Date: Fri, 16 Aug 2024 11:09:59 +0100 Subject: docs: add draft key features section --- README.md | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 9c7baee..0bf6b9d 100644 --- a/README.md +++ b/README.md @@ -21,4 +21,17 @@ can: 1. Extract the data from the `totesys` operational database 2. Store the data in AWS S3 buckets, that will form our data lake 3. Transform the data into a suitable schema for the data warehouse -4. Load the transformed data into the data warehouse hosted on AWS \ No newline at end of file +4. Load the transformed data into the data warehouse hosted on AWS + +# Key Features + +We aim for the project to have certain features. Some are more prioritised than +others. + +- [ ] Automated data ingestion from `totesys` db +- [ ] Data storage for ingested and processed data in S3 buckets +- [ ] Data transformation for data warehouse schema +- [ ] Automated data loading into the data warehouse schema +- [ ] Logging and monitoring with CloudWatch +- [ ] Notifications for errors and successful runs (e.g. successful ingestion) +- [ ] Visualisation of warehouse data \ No newline at end of file -- cgit v1.2.3 From 3d56751d93eeb5ef6cef1f44dd54ee38fcd1fe3c Mon Sep 17 00:00:00 2001 From: lian-manonog Date: Fri, 16 Aug 2024 12:20:54 +0100 Subject: wip: change env line 14 to production --- .github/workflows/deploy.yml | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml index 922daee..bd9df57 100644 --- a/.github/workflows/deploy.yml +++ b/.github/workflows/deploy.yml @@ -5,15 +5,17 @@ on: branches: - ci-cd-branch # Adjust the branch based on our deployment strategy + jobs: deploy-terraform: name: Deploy Terraform runs-on: ubuntu-latest - environment: test-env + #needs: run-checks (must ref on-commit.yml file) + environment: production steps: - name: Checkout Repo uses: actions/checkout@v4 - + - name: Install Terraform uses: hashicorp/setup-terraform@v3 -- cgit v1.2.3 From 9ad481989e7033735815df7c2fe7a277433587a6 Mon Sep 17 00:00:00 2001 From: Alex Date: Fri, 16 Aug 2024 12:36:46 +0100 Subject: ci: create .deepsource.toml for automated commit checking --- .deepsource.toml | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) create mode 100644 .deepsource.toml diff --git a/.deepsource.toml b/.deepsource.toml new file mode 100644 index 0000000..a5002ab --- /dev/null +++ b/.deepsource.toml @@ -0,0 +1,25 @@ +version = 1 + +[[analyzers]] +name = "sql" + +[[analyzers]] +name = "terraform" + +[[analyzers]] +name = "python" + + [analyzers.meta] + runtime_version = "3.x.x" + +[[analyzers]] +name = "secrets" + +[[transformers]] +name = "black" + +[[transformers]] +name = "autopep8" + +[[transformers]] +name = "ruff" -- cgit v1.2.3 From ba82306f1646215f17b55099fd6342bca2a35c97 Mon Sep 17 00:00:00 2001 From: Alex Date: Fri, 16 Aug 2024 12:42:48 +0100 Subject: ci: update .deepsource.toml --- .deepsource.toml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/.deepsource.toml b/.deepsource.toml index a5002ab..b435c8b 100644 --- a/.deepsource.toml +++ b/.deepsource.toml @@ -17,9 +17,14 @@ name = "secrets" [[transformers]] name = "black" +enabled = "true" [[transformers]] name = "autopep8" +enabled = "true" [[transformers]] name = "ruff" +enabled = "true" + + -- cgit v1.2.3 From b3aa2d97e4844b6c205fc9b85427f8d5f150388a Mon Sep 17 00:00:00 2001 From: Alex Date: Fri, 16 Aug 2024 12:48:06 +0100 Subject: fix(ci): fix .deepsource.toml config --- .deepsource.toml | 3 --- 1 file changed, 3 deletions(-) diff --git a/.deepsource.toml b/.deepsource.toml index b435c8b..a840b78 100644 --- a/.deepsource.toml +++ b/.deepsource.toml @@ -17,14 +17,11 @@ name = "secrets" [[transformers]] name = "black" -enabled = "true" [[transformers]] name = "autopep8" -enabled = "true" [[transformers]] name = "ruff" -enabled = "true" -- cgit v1.2.3 From a217da60ba75a226bf72a9fc680c4cbabe883aea Mon Sep 17 00:00:00 2001 From: Alex Schofield Date: Fri, 16 Aug 2024 12:53:22 +0100 Subject: docs: add empty sections --- README.md | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 0bf6b9d..6bc75dc 100644 --- a/README.md +++ b/README.md @@ -34,4 +34,10 @@ others. - [ ] Automated data loading into the data warehouse schema - [ ] Logging and monitoring with CloudWatch - [ ] Notifications for errors and successful runs (e.g. successful ingestion) -- [ ] Visualisation of warehouse data \ No newline at end of file +- [ ] Visualisation of warehouse data + +# Test Coverage +TBA + +# Contributors +TBA \ No newline at end of file -- cgit v1.2.3 From e97ab6b46f181db107b7a640f386f5f57480347c Mon Sep 17 00:00:00 2001 From: lian-manonog Date: Fri, 16 Aug 2024 14:16:03 +0100 Subject: add makefile in root: not in use currently --- Makefile | 80 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 80 insertions(+) create mode 100644 Makefile diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..077cd98 --- /dev/null +++ b/Makefile @@ -0,0 +1,80 @@ +############################################## +# # +# MAKEFILE TO BUILD THE PROJECT # +# # +############################################## + +PROJECT_NAME = de-project-bentley +REGION = eu-west-2 +PYTHON_INTERPRETER = python +WD=$(shell pwd) +PYTHONPATH=${WD} +SHELL := /bin/bash +PROFILE = default +PIP:=pip + +## PYTHON INTERPRETER ENVIRONMENT +create-environment: + @echo ">>> About to create environment: $(PROJECT_NAME)..." + @echo ">>> check python3 version" + ( \ + $(PYTHON_INTERPRETER) --version; \ + ) + @echo ">>> Setting up VirtualEnv." + ( \ + $(PIP) install -q virtualenv virtualenvwrapper; \ + virtualenv venv --python=$(PYTHON_INTERPRETER); \ + ) + +ACTIVATE_ENV := source venv/bin/activate + +# Execute python related functionalities from within the project's environment +define execute_in_env + $(ACTIVATE_ENV) && $1 +endef + +## Build the environment requirements +requirements: create-environment + $(call execute_in_env, $(PIP) install -r ./requirements.txt) + +# Set Up +## Install bandit +bandit: + $(call execute_in_env, $(PIP) install bandit) + +## Install safety +safety: + $(call execute_in_env, $(PIP) install safety) + +## Install black +black: + $(call execute_in_env, $(PIP) install black) + +## Install coverage +coverage: + $(call execute_in_env, $(PIP) install coverage) + +## Set up dev requirements (bandit, safety, black) +dev-setup: bandit safety black coverage + +# Build / Run + +## Run the security test (bandit + safety) +security-test: + $(call execute_in_env, safety check -r ./requirements.txt) + $(call execute_in_env, bandit -lll */*.py *c/*/*.py) + +## Run the black code check +run-black: + $(call execute_in_env, black ./src/*/*.py ./test/*/*.py) + +## Run the unit tests +unit-test: + $(call execute_in_env, PYTHONPATH=${PYTHONPATH} pytest -v) + +## Run the coverage check +check-coverage: + $(call execute_in_env, PYTHONPATH=${PYTHONPATH} pytest --cov=src test/) + +## Run all checks +run-checks: security-test run-black unit-test check-coverage -- cgit v1.2.3 From 2bcedc300f36760b55f0db8cfb4e724362d1c251 Mon Sep 17 00:00:00 2001 From: Alex Schofield Date: Fri, 16 Aug 2024 14:27:41 +0100 Subject: chore(ci): remove redundant on-commit.yml --- .github/workflows/on-commit.yml | 50 ----------------------------------------- 1 file changed, 50 deletions(-) delete mode 100644 .github/workflows/on-commit.yml diff --git a/.github/workflows/on-commit.yml b/.github/workflows/on-commit.yml deleted file mode 100644 index fd9ffb8..0000000 --- a/.github/workflows/on-commit.yml +++ /dev/null @@ -1,50 +0,0 @@ -name: commit-qc-checks - -on: - push: - branches-ignore: - - 'main' - -jobs: - python-quality-checks: - runs-on: ubuntu-latest - steps: - - uses : actions/checkout@v4 - - name : 'Python: Setup' - uses : actions/setup-python@v5 - with: - python-version: 3.11 - - name : 'Python: Install Dependencies' - run: | - python -m pip install --upgrade pip - pip install flake8 pylint black bandit safety - continue-on-error: true - - name : 'Python: Linting' - run: | - flake8 . - find . -name "*.py" | xargs pylint - continue-on-error: true - - name : 'Python: Formatting' - run: | - black --check . - continue-on-error: true - terraform-quality-checks: - runs-on: ubuntu-latest - steps: - - uses : actions/checkout@v4 - - name: 'Terraform: Setup' - uses: hashicorp/setup-terraform@v3 - with: - terraform_version: latest - - name: 'Terraform: Formatting' - working-directory: terraform - run: terraform fmt -check -recursive - continue-on-error: true - - name: 'Terraform: Initialise' - working-directory: terraform - run: terraform init -backend=false - continue-on-error: true - - name: 'Terraform: Validate' - working-directory: terraform - run: terraform validate - continue-on-error: true \ No newline at end of file -- cgit v1.2.3 From cf3d366e730e88ceea194d5b3b1d1a3ddecdd944 Mon Sep 17 00:00:00 2001 From: Alex Schofield Date: Fri, 16 Aug 2024 14:30:07 +0100 Subject: ci: deploy only on push/pr to main --- .github/workflows/deploy.yml | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml index bd9df57..db51d20 100644 --- a/.github/workflows/deploy.yml +++ b/.github/workflows/deploy.yml @@ -1,10 +1,13 @@ name: deploy-terraform on: - push: + pull-request: branches: - - ci-cd-branch # Adjust the branch based on our deployment strategy - + - main + pull: + branches: + - main + jobs: deploy-terraform: @@ -36,4 +39,4 @@ jobs: - name: Terraform Apply working-directory: terraform - run: terraform apply --auto-approve \ No newline at end of file + run: terraform apply --auto-approve -- cgit v1.2.3 From 63b5f3e5f1888d5653d2f7b3529b3d72e3315dbf Mon Sep 17 00:00:00 2001 From: Alex Schofield Date: Fri, 16 Aug 2024 14:43:46 +0100 Subject: fix(ci): amend pull_request syntax --- .github/workflows/deploy.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml index db51d20..00c7263 100644 --- a/.github/workflows/deploy.yml +++ b/.github/workflows/deploy.yml @@ -1,7 +1,7 @@ name: deploy-terraform on: - pull-request: + pull_request: branches: - main pull: -- cgit v1.2.3 From 9cec304b2f8c2832c4a715bba784a34f7c674c19 Mon Sep 17 00:00:00 2001 From: Alex Schofield Date: Fri, 16 Aug 2024 14:52:35 +0100 Subject: fix(ci): amend pull to push --- .github/workflows/deploy.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml index 00c7263..5672048 100644 --- a/.github/workflows/deploy.yml +++ b/.github/workflows/deploy.yml @@ -4,7 +4,7 @@ on: pull_request: branches: - main - pull: + push: branches: - main -- cgit v1.2.3