diff options
| author | Alex <git@ajschof.me> | 2024-08-16 15:37:46 +0100 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2024-08-16 15:37:46 +0100 |
| commit | 9d189d7c8c4fae09ece780521ab9c10935be8aaf (patch) | |
| tree | 03938087ec6c91a744ea54eae056dea0eece549e | |
| parent | e153f2072eafca2c83a84e2c4210c46a40dabaf4 (diff) | |
| parent | eb3353ea0df1eee2df2c6a9a9efad121e56e1790 (diff) | |
| download | de-project-bentley-9d189d7c8c4fae09ece780521ab9c10935be8aaf.tar.gz de-project-bentley-9d189d7c8c4fae09ece780521ab9c10935be8aaf.zip | |
Merge branch 'development' into extract_lambda_refactoring
| -rw-r--r-- | .deepsource.toml | 27 | ||||
| -rw-r--r-- | .github/workflows/deploy.yml | 13 | ||||
| -rw-r--r-- | .github/workflows/on-commit.yml | 50 | ||||
| -rw-r--r-- | DEVNOTES.md | 100 | ||||
| -rw-r--r-- | Makefile | 80 | ||||
| -rw-r--r-- | README.md | 44 |
6 files changed, 159 insertions, 155 deletions
diff --git a/.deepsource.toml b/.deepsource.toml new file mode 100644 index 0000000..a840b78 --- /dev/null +++ b/.deepsource.toml @@ -0,0 +1,27 @@ +version = 1 + +[[analyzers]] +name = "sql" + +[[analyzers]] +name = "terraform" + +[[analyzers]] +name = "python" + + [analyzers.meta] + runtime_version = "3.x.x" + +[[analyzers]] +name = "secrets" + +[[transformers]] +name = "black" + +[[transformers]] +name = "autopep8" + +[[transformers]] +name = "ruff" + + diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml index 372d0b3..5672048 100644 --- a/.github/workflows/deploy.yml +++ b/.github/workflows/deploy.yml @@ -1,19 +1,24 @@ name: deploy-terraform on: + pull_request: + branches: + - main push: branches: - - test-ci/** # Adjust the branch based on our deployment strategy + - main + jobs: deploy-terraform: name: Deploy Terraform runs-on: ubuntu-latest - environment: test-env + #needs: run-checks (must ref on-commit.yml file) + environment: production steps: - name: Checkout Repo uses: actions/checkout@v4 - + - name: Install Terraform uses: hashicorp/setup-terraform@v3 @@ -34,4 +39,4 @@ jobs: - name: Terraform Apply working-directory: terraform - run: terraform apply --auto-approve
\ No newline at end of file + run: terraform apply --auto-approve diff --git a/.github/workflows/on-commit.yml b/.github/workflows/on-commit.yml deleted file mode 100644 index fd9ffb8..0000000 --- a/.github/workflows/on-commit.yml +++ /dev/null @@ -1,50 +0,0 @@ -name: commit-qc-checks - -on: - push: - branches-ignore: - - 'main' - -jobs: - python-quality-checks: - runs-on: ubuntu-latest - steps: - - uses : actions/checkout@v4 - - name : 'Python: Setup' - uses : actions/setup-python@v5 - with: - python-version: 3.11 - - name : 'Python: Install Dependencies' - run: | - python -m pip install --upgrade pip - pip install flake8 pylint black bandit safety - continue-on-error: true - - name : 'Python: Linting' - run: | - flake8 . - find . -name "*.py" | xargs pylint - continue-on-error: true - - name : 'Python: Formatting' - run: | - black --check . - continue-on-error: true - terraform-quality-checks: - runs-on: ubuntu-latest - steps: - - uses : actions/checkout@v4 - - name: 'Terraform: Setup' - uses: hashicorp/setup-terraform@v3 - with: - terraform_version: latest - - name: 'Terraform: Formatting' - working-directory: terraform - run: terraform fmt -check -recursive - continue-on-error: true - - name: 'Terraform: Initialise' - working-directory: terraform - run: terraform init -backend=false - continue-on-error: true - - name: 'Terraform: Validate' - working-directory: terraform - run: terraform validate - continue-on-error: true
\ No newline at end of file diff --git a/DEVNOTES.md b/DEVNOTES.md deleted file mode 100644 index 00b4ddd..0000000 --- a/DEVNOTES.md +++ /dev/null @@ -1,100 +0,0 @@ -# Workflow - -## References - -https://nvie.com/posts/a-successful-git-branching-model/ \ -https://learn.microsoft.com/en-us/azure/devops/repos/git/merging-with-squash?view=azure-devops - - -## Branching - -*Based off GitFlow but slightly modified* - -- There are two main branches - - `main` - production-ready code - - `development` - integration branch for features - - `staging` - represents the current staging state -- In addition, there are additional branches - - Feature branches - for new features and non-urgent bugfixes - - Hotfix branches - probably won't be used but for critical bugs in production (this is what testing should prevent) - - Release branches - for preparation of production releases - -- Feature branches - e.g. `feature/short-description` -- Bugfix branches - e.g. `bugfix/short-description` -- Hotfix branches - e.g. `hotfix/short-description` -- Release branches - e.g. `release/vX.Y.Z` - -### Examples -``` -feature/add-data-extractor -bugfix/fix-s3-upload-error -hotfix/security-patch -release/v1.0.0 -``` - -## Environments - -1. Development - where active development and initial testing occur -2. Staging - for integration testing and final checks before production -3. Production - live and stable environment - -## Deployment - -1. `main` - represents the current production state -2. `develop` - represents the integration branch for features and non-urgent fixes -3. `staging` - represents the current staging state - -## Staging Flow - -1. Create feature branches from `develop` & merge completed features back into `develop` -2. When the `develop` branch is ready for testing, create a `staging` branch from `develop` -3. Deploy the `staging` branch to the staging environment and perform our unit-tests -4. If staging tests pass, create a `release/vX.Y.Z` branch from `staging` -5. Make any final adjustments in the `release/vX.Y.Z` branch -6. Once we have approved the changes in the `release/vX.Y.Z` branch, merge into `main` -7. Tag the release in `main` - -### Notes - -- No new features should be included in the release branches and any new features should be merged into `develop` for the next release cycle - -## Commit Messages - -Please follow the conventional commits specification: - -``` -<type>[optional scope]: <description> - -<optional body> - -[optional footer(s)] -``` - -### Types -- feat: new features -- fix: bugfixes -- docs: documentation-only changes -- style: changes that do not affect the meaning of the code -- refactor: code changes that neither fix bugs nor adds features -- perf: code changes that improve performance -- test: adding tests or correcting existing tests -- chore: changes to build process or tools/libraries (probably not needed) -- infra: changes to infrastructure configuration (e.g. Terraform) - -### Examples -``` -feat(extract): add automatic scheduling for data ingestion -docs: update README with project setup instructions -``` - -Configuration files for things such as Terraform isn't native to Conventional Commits, but we can add our own: - -``` -infra(tf): update S3 bucket policy -``` - -If the Terraform change involves a fix, you may combine `fix` and `infra`: - -``` -fix(infra): ... -``` diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..077cd98 --- /dev/null +++ b/Makefile @@ -0,0 +1,80 @@ +############################################## +# # +# MAKEFILE TO BUILD THE PROJECT # +# # +############################################## + +PROJECT_NAME = de-project-bentley +REGION = eu-west-2 +PYTHON_INTERPRETER = python +WD=$(shell pwd) +PYTHONPATH=${WD} +SHELL := /bin/bash +PROFILE = default +PIP:=pip + +## PYTHON INTERPRETER ENVIRONMENT +create-environment: + @echo ">>> About to create environment: $(PROJECT_NAME)..." + @echo ">>> check python3 version" + ( \ + $(PYTHON_INTERPRETER) --version; \ + ) + @echo ">>> Setting up VirtualEnv." + ( \ + $(PIP) install -q virtualenv virtualenvwrapper; \ + virtualenv venv --python=$(PYTHON_INTERPRETER); \ + ) + +ACTIVATE_ENV := source venv/bin/activate + +# Execute python related functionalities from within the project's environment +define execute_in_env + $(ACTIVATE_ENV) && $1 +endef + +## Build the environment requirements +requirements: create-environment + $(call execute_in_env, $(PIP) install -r ./requirements.txt) + +# Set Up +## Install bandit +bandit: + $(call execute_in_env, $(PIP) install bandit) + +## Install safety +safety: + $(call execute_in_env, $(PIP) install safety) + +## Install black +black: + $(call execute_in_env, $(PIP) install black) + +## Install coverage +coverage: + $(call execute_in_env, $(PIP) install coverage) + +## Set up dev requirements (bandit, safety, black) +dev-setup: bandit safety black coverage + +# Build / Run + +## Run the security test (bandit + safety) +security-test: + $(call execute_in_env, safety check -r ./requirements.txt) + $(call execute_in_env, bandit -lll */*.py *c/*/*.py) + +## Run the black code check +run-black: + $(call execute_in_env, black ./src/*/*.py ./test/*/*.py) + +## Run the unit tests +unit-test: + $(call execute_in_env, PYTHONPATH=${PYTHONPATH} pytest -v) + +## Run the coverage check +check-coverage: + $(call execute_in_env, PYTHONPATH=${PYTHONPATH} pytest --cov=src test/) + +## Run all checks +run-checks: security-test run-black unit-test check-coverage @@ -1 +1,43 @@ -# de-project-bentley
\ No newline at end of file +# ToteSys - Data Engineering Project + +# Summary +The project aims to implement a data platform that can extract data from an +operational database, archive it in a data lake, and make it easily accessible +within a remodelled OLAP data warehouse. + +The solution showcases our skills in: + +- Python +- PostgreSQL +- Database modelling +- Amazon Web Services (AWS) +- Agile methodologies + +# Main Objective + +Our goal is to create a reliable ETL (Extract, Transform, Load) pipeline that +can: + +1. Extract the data from the `totesys` operational database +2. Store the data in AWS S3 buckets, that will form our data lake +3. Transform the data into a suitable schema for the data warehouse +4. Load the transformed data into the data warehouse hosted on AWS + +# Key Features + +We aim for the project to have certain features. Some are more prioritised than +others. + +- [ ] Automated data ingestion from `totesys` db +- [ ] Data storage for ingested and processed data in S3 buckets +- [ ] Data transformation for data warehouse schema +- [ ] Automated data loading into the data warehouse schema +- [ ] Logging and monitoring with CloudWatch +- [ ] Notifications for errors and successful runs (e.g. successful ingestion) +- [ ] Visualisation of warehouse data + +# Test Coverage +TBA + +# Contributors +TBA
\ No newline at end of file |
