From 610261fec06ab3b6106465960d6935dd9df85df0 Mon Sep 17 00:00:00 2001 From: Ang Bel Date: Fri, 16 Aug 2024 09:46:53 +0100 Subject: Secrets manager integration into the extract lambda reviewed. --- src/extract_lambda.py | 29 +++++++++-------- tests/test_secrets_manager.py | 73 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 89 insertions(+), 13 deletions(-) create mode 100644 tests/test_secrets_manager.py diff --git a/src/extract_lambda.py b/src/extract_lambda.py index fb2d7e8..3055f63 100644 --- a/src/extract_lambda.py +++ b/src/extract_lambda.py @@ -1,5 +1,4 @@ -from pg8000.native import Connection, DatabaseError, InterfaceError -from dotenv import dotenv_values +from pg8000.native import Connection, InterfaceError import boto3 import csv from botocore.exceptions import ClientError @@ -42,31 +41,35 @@ def lambda_handler(event, context): 'statusCode': 200, 'body': json.dumps('CSV files processed and uploaded successfully.') } - except Exception as e: logger.error(f'Error: {e}') return { 'statusCode': 500, 'body': json.dumps('Internal server error.') } - finally: - if db: db.close() -def get_config(path: str = ".env") -> dict: - return dotenv_values(path) +def retrieve_secrets(sm_client=boto3.client('secretsmanager'), secret_name='bentley-secrets'): + try: + response = sm_client.get_secret_value(SecretId=secret_name) + if 'SecretString' in response: + secret = json.loads(response['SecretString']) + return secret + except ClientError as e: + logger.error(f'Could not retrieve secrets: {e}') + raise e def connect_to_database() -> Connection: try: - config = get_config() - host = config["host"] - port = config["port"] - user = config["user"] - password = config["password"] - database = config["database"] + secrets = retrieve_secrets() + host = secrets["host"] + port = secrets["port"] + user = secrets["user"] + password = secrets["password"] + database = secrets["database"] return Connection( database=database, diff --git a/tests/test_secrets_manager.py b/tests/test_secrets_manager.py new file mode 100644 index 0000000..a30be86 --- /dev/null +++ b/tests/test_secrets_manager.py @@ -0,0 +1,73 @@ +from src.secrets_manager import sm_client, retrieve_secrets +import boto3 +import botocore.exceptions +from moto import mock_aws +import json +import pytest +import os + +@pytest.fixture(scope='function') +def aws_credentials(): + """Mocked AWS Credentials for moto.""" + os.environ["AWS_ACCESS_KEY_ID"] = "testing" + os.environ["AWS_SECRET_ACCESS_KEY"] = "testing" + os.environ["AWS_SECURITY_TOKEN"] = "testing" + os.environ["AWS_SESSION_TOKEN"] = "testing" + os.environ["AWS_DEFAULT_REGION"] = "eu-west-2" + +@pytest.fixture(scope='function') +def mock_sm_client(aws_credentials): + with mock_aws(): + yield boto3.client("secretsmanager") + +@pytest.fixture(scope='function') +def mock_store_secret(mock_sm_client): + secret = { + "cohort_id": "test_cohort_id", + "user": "test_user_id", + "password": "test_password", + "host": "test_host", + "database": "test_database", + "port": "test_port" + } + + secret_name = "test_secret" + + response = mock_sm_client.create_secret(Name=secret_name, SecretString=json.dumps(secret)) + + return response + +def test_retrieves_secrets_returns_dictionary(mock_sm_client, mock_store_secret): + secret_name = "test_secret" + + result = retrieve_secrets(mock_sm_client, secret_name) + + assert isinstance(result, dict) + +def test_retrieves_secrets_returns_correct_keys_and_values(mock_sm_client, mock_store_secret): + + secret_name = "test_secret" + + result = retrieve_secrets(mock_sm_client, secret_name) + + assert result["cohort_id"] == "test_cohort_id" + assert result["user"] == "test_user_id" + assert result["password"] == "test_password" + assert result["host"] == "test_host" + assert result["database"] == "test_database" + assert result["port"] == "test_port" + +def test_retrieves_secrets_raises_error_if_secret_name_incorrect_data_type(mock_sm_client): + secret_name = [1, 2, 3] + + + with pytest.raises(botocore.exceptions.ParamValidationError) as error: + retrieve_secrets(mock_sm_client, secret_name) + + +def test_retrieves_secrets_raises_error_if_secret_name_does_not_exist(mock_sm_client, mock_store_secret): + secret_name = 'test_secret_2' + + + with pytest.raises(botocore.exceptions.ClientError) as error: + retrieve_secrets(mock_sm_client, secret_name) \ No newline at end of file -- cgit v1.2.3 From 938ddda10ff2f7d5360ca0a939fa2f16d6beb09d Mon Sep 17 00:00:00 2001 From: Ang Bel Date: Fri, 16 Aug 2024 10:01:06 +0100 Subject: extract bucket name retrieval helper function and replace the bucket name placeholders --- src/extract_lambda.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/src/extract_lambda.py b/src/extract_lambda.py index 3055f63..f4c0c1d 100644 --- a/src/extract_lambda.py +++ b/src/extract_lambda.py @@ -82,9 +82,12 @@ def connect_to_database() -> Connection: logger.error(f'Interface error: {i}') raise DBConnectionException("Failed to connect to database") +def extract_bucket(client=boto3.client('s3')): + response = client.list_buckets() + extract_bucket_filter = [bucket['Name'] for bucket in response['Buckets'] if 'extract' in bucket['Name']] + return extract_bucket_filter[0] - -def list_existing_s3_files(bucket_name='extract_bucket', client=boto3.client('s3')): +def list_existing_s3_files(bucket_name=extract_bucket(), client=boto3.client('s3')): """Creates a dictionary and populates it with the results of listing the contents of the s3 bucket, then returns the populated dictionary @@ -93,7 +96,7 @@ def list_existing_s3_files(bucket_name='extract_bucket', client=boto3.client('s3 existing_files = {} try: - response = client.list_objects_v2(Bucket='extract_bucket') + response = client.list_objects_v2(Bucket=bucket_name) if 'Contents' in response: for obj in response['Contents']: @@ -150,7 +153,7 @@ def process_and_upload_tables(db, existing_files, client=boto3.client('s3')): ## END OF NEW CODE if existing_files[latest_s3_object_key] != new_csv_content: try: - client.upload_file(csv_file_path, 'extract_bucket', s3_key) + client.upload_file(csv_file_path, extract_bucket(), s3_key) logger.info(f"Uploaded {s3_key} to S3.") except ClientError as e: logger.error(f'Error uploading to S3: {e}') -- cgit v1.2.3 From c937a7e098d818dadbc769b3c9eb9fd93cc05af2 Mon Sep 17 00:00:00 2001 From: Alex Schofield Date: Fri, 16 Aug 2024 10:01:28 +0100 Subject: docs: rm DEVNOTES.md basically redundant now --- DEVNOTES.md | 100 ------------------------------------------------------------ 1 file changed, 100 deletions(-) delete mode 100644 DEVNOTES.md diff --git a/DEVNOTES.md b/DEVNOTES.md deleted file mode 100644 index 00b4ddd..0000000 --- a/DEVNOTES.md +++ /dev/null @@ -1,100 +0,0 @@ -# Workflow - -## References - -https://nvie.com/posts/a-successful-git-branching-model/ \ -https://learn.microsoft.com/en-us/azure/devops/repos/git/merging-with-squash?view=azure-devops - - -## Branching - -*Based off GitFlow but slightly modified* - -- There are two main branches - - `main` - production-ready code - - `development` - integration branch for features - - `staging` - represents the current staging state -- In addition, there are additional branches - - Feature branches - for new features and non-urgent bugfixes - - Hotfix branches - probably won't be used but for critical bugs in production (this is what testing should prevent) - - Release branches - for preparation of production releases - -- Feature branches - e.g. `feature/short-description` -- Bugfix branches - e.g. `bugfix/short-description` -- Hotfix branches - e.g. `hotfix/short-description` -- Release branches - e.g. `release/vX.Y.Z` - -### Examples -``` -feature/add-data-extractor -bugfix/fix-s3-upload-error -hotfix/security-patch -release/v1.0.0 -``` - -## Environments - -1. Development - where active development and initial testing occur -2. Staging - for integration testing and final checks before production -3. Production - live and stable environment - -## Deployment - -1. `main` - represents the current production state -2. `develop` - represents the integration branch for features and non-urgent fixes -3. `staging` - represents the current staging state - -## Staging Flow - -1. Create feature branches from `develop` & merge completed features back into `develop` -2. When the `develop` branch is ready for testing, create a `staging` branch from `develop` -3. Deploy the `staging` branch to the staging environment and perform our unit-tests -4. If staging tests pass, create a `release/vX.Y.Z` branch from `staging` -5. Make any final adjustments in the `release/vX.Y.Z` branch -6. Once we have approved the changes in the `release/vX.Y.Z` branch, merge into `main` -7. Tag the release in `main` - -### Notes - -- No new features should be included in the release branches and any new features should be merged into `develop` for the next release cycle - -## Commit Messages - -Please follow the conventional commits specification: - -``` -[optional scope]: - - - -[optional footer(s)] -``` - -### Types -- feat: new features -- fix: bugfixes -- docs: documentation-only changes -- style: changes that do not affect the meaning of the code -- refactor: code changes that neither fix bugs nor adds features -- perf: code changes that improve performance -- test: adding tests or correcting existing tests -- chore: changes to build process or tools/libraries (probably not needed) -- infra: changes to infrastructure configuration (e.g. Terraform) - -### Examples -``` -feat(extract): add automatic scheduling for data ingestion -docs: update README with project setup instructions -``` - -Configuration files for things such as Terraform isn't native to Conventional Commits, but we can add our own: - -``` -infra(tf): update S3 bucket policy -``` - -If the Terraform change involves a fix, you may combine `fix` and `infra`: - -``` -fix(infra): ... -``` -- cgit v1.2.3 From 861fd5fe8303c6558c7763477c89dc98fff23c57 Mon Sep 17 00:00:00 2001 From: lian-manonog Date: Fri, 16 Aug 2024 10:20:14 +0100 Subject: wip: pushing the ci-cd-branch to test terraform infra --- .github/workflows/deploy.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml index 372d0b3..922daee 100644 --- a/.github/workflows/deploy.yml +++ b/.github/workflows/deploy.yml @@ -3,7 +3,7 @@ name: deploy-terraform on: push: branches: - - test-ci/** # Adjust the branch based on our deployment strategy + - ci-cd-branch # Adjust the branch based on our deployment strategy jobs: deploy-terraform: -- cgit v1.2.3 From d25f05ba140cb85847ca604bef0e68b76a17ba62 Mon Sep 17 00:00:00 2001 From: Alex Schofield Date: Fri, 16 Aug 2024 10:34:50 +0100 Subject: docs: add draft summary section --- README.md | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 8ae0cb3..203482e 100644 --- a/README.md +++ b/README.md @@ -1 +1,14 @@ -# de-project-bentley \ No newline at end of file +# ToteSys - Data Engineering Project + +# Summary +The project aims to implement a data platform that can extract data from an +operational database, archive it in a data lake, and make it easily accessible +within a remodelled OLAP data warehouse. + +The solution showcases our skills in: + +- Python +- PostgreSQL +- Database modelling +- Amazon Web Services (AWS) +- Agile methodologies \ No newline at end of file -- cgit v1.2.3 From 9809e7ca1351d7b27f62b3c7c74db7124cab5dc9 Mon Sep 17 00:00:00 2001 From: Alex Schofield Date: Fri, 16 Aug 2024 10:40:00 +0100 Subject: docs: add draft main objective section --- README.md | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 203482e..e55cb16 100644 --- a/README.md +++ b/README.md @@ -11,4 +11,14 @@ The solution showcases our skills in: - PostgreSQL - Database modelling - Amazon Web Services (AWS) -- Agile methodologies \ No newline at end of file +- Agile methodologies + +# Main Objective + +Our goal is to create a reliable ETL (Extract, Transform, Load) pipeline that +can: + +1. Extract the data from the `totesys` operational database +2. Store the data in AWS S3 buckets, that will form our data lake +3. Transform the data into a suitable schema for the data warehouse +4. Load the data into the data warehouse hosted on AWS \ No newline at end of file -- cgit v1.2.3 From 37eb3bb7974904614867c7d0c2d4f6eccb39f22e Mon Sep 17 00:00:00 2001 From: Alex Schofield Date: Fri, 16 Aug 2024 10:41:01 +0100 Subject: docs(main_obj): clarify data being loaded into data warehouse --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index e55cb16..9c7baee 100644 --- a/README.md +++ b/README.md @@ -21,4 +21,4 @@ can: 1. Extract the data from the `totesys` operational database 2. Store the data in AWS S3 buckets, that will form our data lake 3. Transform the data into a suitable schema for the data warehouse -4. Load the data into the data warehouse hosted on AWS \ No newline at end of file +4. Load the transformed data into the data warehouse hosted on AWS \ No newline at end of file -- cgit v1.2.3 From 0c42e8f165e0f98a6c16252e841432922467ef94 Mon Sep 17 00:00:00 2001 From: Ellie Date: Fri, 16 Aug 2024 10:51:16 +0100 Subject: add lambda layer --- terraform/lambda.tf | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/terraform/lambda.tf b/terraform/lambda.tf index 72d1306..658b8c8 100644 --- a/terraform/lambda.tf +++ b/terraform/lambda.tf @@ -81,3 +81,38 @@ resource "aws_lambda_function" "load_lambda" { depends_on = [aws_s3_object.load_lambda_code] } + +locals { + layer_dir = "${path.module}/../python" + requirements = "${path.module}/../requirements.txt" + layer_zip = "${path.module}/../layer.zip" +} + +resource "null_resource" "prepare_layer" { + triggers = { + requirements_hash = filesha1(local.requirements) + } + provisioner "local-exec" { + command = < Date: Fri, 16 Aug 2024 11:09:59 +0100 Subject: docs: add draft key features section --- README.md | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 9c7baee..0bf6b9d 100644 --- a/README.md +++ b/README.md @@ -21,4 +21,17 @@ can: 1. Extract the data from the `totesys` operational database 2. Store the data in AWS S3 buckets, that will form our data lake 3. Transform the data into a suitable schema for the data warehouse -4. Load the transformed data into the data warehouse hosted on AWS \ No newline at end of file +4. Load the transformed data into the data warehouse hosted on AWS + +# Key Features + +We aim for the project to have certain features. Some are more prioritised than +others. + +- [ ] Automated data ingestion from `totesys` db +- [ ] Data storage for ingested and processed data in S3 buckets +- [ ] Data transformation for data warehouse schema +- [ ] Automated data loading into the data warehouse schema +- [ ] Logging and monitoring with CloudWatch +- [ ] Notifications for errors and successful runs (e.g. successful ingestion) +- [ ] Visualisation of warehouse data \ No newline at end of file -- cgit v1.2.3 From 24dd35f4bc6a0b8934f09b320f73bc88c6f68f1f Mon Sep 17 00:00:00 2001 From: Ellie Date: Fri, 16 Aug 2024 12:19:54 +0100 Subject: comment out rds.tf to increases tf speed --- terraform/rds.tf | 138 +++++++++++++++++++++++++++---------------------------- 1 file changed, 69 insertions(+), 69 deletions(-) diff --git a/terraform/rds.tf b/terraform/rds.tf index 88783b7..d1b4959 100644 --- a/terraform/rds.tf +++ b/terraform/rds.tf @@ -1,80 +1,80 @@ -data "aws_availability_zones" "available" {} +# data "aws_availability_zones" "available" {} -module "vpc" { - source = "terraform-aws-modules/vpc/aws" - version = "5.12.1" +# module "vpc" { +# source = "terraform-aws-modules/vpc/aws" +# version = "5.12.1" - name = var.project_name - cidr = "10.0.0.0/16" - azs = data.aws_availability_zones.available.names - public_subnets = ["10.0.4.0/24", "10.0.5.0/24", "10.0.6.0/24"] - enable_dns_hostnames = true - enable_dns_support = true -} +# name = var.project_name +# cidr = "10.0.0.0/16" +# azs = data.aws_availability_zones.available.names +# public_subnets = ["10.0.4.0/24", "10.0.5.0/24", "10.0.6.0/24"] +# enable_dns_hostnames = true +# enable_dns_support = true +# } -resource "aws_db_subnet_group" "Terrific-Totes-sub-gr" { - name = "tt-db-subnet" - subnet_ids = module.vpc.public_subnets +# resource "aws_db_subnet_group" "Terrific-Totes-sub-gr" { +# name = "tt-db-subnet" +# subnet_ids = module.vpc.public_subnets - tags = { - Name = "${var.project_name}" - } -} +# tags = { +# Name = "${var.project_name}" +# } +# } -resource "aws_security_group" "rds" { - name = "${var.project_name}-rds" - vpc_id = module.vpc.vpc_id +# resource "aws_security_group" "rds" { +# name = "${var.project_name}-rds" +# vpc_id = module.vpc.vpc_id - ingress { - from_port = 5432 - to_port = 5432 - protocol = "tcp" - cidr_blocks = ["0.0.0.0/0"] - } +# ingress { +# from_port = 5432 +# to_port = 5432 +# protocol = "tcp" +# cidr_blocks = ["0.0.0.0/0"] +# } - egress { - from_port = 5432 - to_port = 5432 - protocol = "tcp" - cidr_blocks = ["0.0.0.0/0"] - } +# egress { +# from_port = 5432 +# to_port = 5432 +# protocol = "tcp" +# cidr_blocks = ["0.0.0.0/0"] +# } - tags = { - Name = "${var.project_name}-rds" - } -} +# tags = { +# Name = "${var.project_name}-rds" +# } +# } -resource "aws_db_parameter_group" "Terrific-Totes-param-gr" { - name = "tt-db-param" - family = "postgres14" +# resource "aws_db_parameter_group" "Terrific-Totes-param-gr" { +# name = "tt-db-param" +# family = "postgres14" - parameter { - name = "log_connections" - value = "1" - } -} +# parameter { +# name = "log_connections" +# value = "1" +# } +# } -resource "aws_db_instance" "terrific-totes-rds" { - db_name = var.project_name - instance_class = "db.t3.micro" - allocated_storage = 5 - engine = "postgres" - engine_version = "14.10" - username = "totes" - password = "totes123" - # username = "user credentials for the root user" # we could use .env here - # password = "user password for the root user" # we could use .env here - ### alternatively to providing username nad password we can specify: - # resource "aws_kms_key" "example_key" { - # description = "Example KMS Key" - # } - # within the resource: - # manage_master_user_password = true - # master_user_secret_kms_key_id = aws_kms_key.example.key_id - # } - db_subnet_group_name = aws_db_subnet_group.Terrific-Totes-sub-gr.name - vpc_security_group_ids = [aws_security_group.rds.id] - parameter_group_name = aws_db_parameter_group.Terrific-Totes-param-gr.name - publicly_accessible = false - skip_final_snapshot = true -} +# resource "aws_db_instance" "terrific-totes-rds" { +# db_name = var.project_name +# instance_class = "db.t3.micro" +# allocated_storage = 5 +# engine = "postgres" +# engine_version = "14.10" +# username = "totes" +# password = "totes123" +# # username = "user credentials for the root user" # we could use .env here +# # password = "user password for the root user" # we could use .env here +# ### alternatively to providing username nad password we can specify: +# # resource "aws_kms_key" "example_key" { +# # description = "Example KMS Key" +# # } +# # within the resource: +# # manage_master_user_password = true +# # master_user_secret_kms_key_id = aws_kms_key.example.key_id +# # } +# db_subnet_group_name = aws_db_subnet_group.Terrific-Totes-sub-gr.name +# vpc_security_group_ids = [aws_security_group.rds.id] +# parameter_group_name = aws_db_parameter_group.Terrific-Totes-param-gr.name +# publicly_accessible = false +# skip_final_snapshot = true +# } -- cgit v1.2.3 From 3d56751d93eeb5ef6cef1f44dd54ee38fcd1fe3c Mon Sep 17 00:00:00 2001 From: lian-manonog Date: Fri, 16 Aug 2024 12:20:54 +0100 Subject: wip: change env line 14 to production --- .github/workflows/deploy.yml | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml index 922daee..bd9df57 100644 --- a/.github/workflows/deploy.yml +++ b/.github/workflows/deploy.yml @@ -5,15 +5,17 @@ on: branches: - ci-cd-branch # Adjust the branch based on our deployment strategy + jobs: deploy-terraform: name: Deploy Terraform runs-on: ubuntu-latest - environment: test-env + #needs: run-checks (must ref on-commit.yml file) + environment: production steps: - name: Checkout Repo uses: actions/checkout@v4 - + - name: Install Terraform uses: hashicorp/setup-terraform@v3 -- cgit v1.2.3 From 9ad481989e7033735815df7c2fe7a277433587a6 Mon Sep 17 00:00:00 2001 From: Alex Date: Fri, 16 Aug 2024 12:36:46 +0100 Subject: ci: create .deepsource.toml for automated commit checking --- .deepsource.toml | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) create mode 100644 .deepsource.toml diff --git a/.deepsource.toml b/.deepsource.toml new file mode 100644 index 0000000..a5002ab --- /dev/null +++ b/.deepsource.toml @@ -0,0 +1,25 @@ +version = 1 + +[[analyzers]] +name = "sql" + +[[analyzers]] +name = "terraform" + +[[analyzers]] +name = "python" + + [analyzers.meta] + runtime_version = "3.x.x" + +[[analyzers]] +name = "secrets" + +[[transformers]] +name = "black" + +[[transformers]] +name = "autopep8" + +[[transformers]] +name = "ruff" -- cgit v1.2.3 From ba82306f1646215f17b55099fd6342bca2a35c97 Mon Sep 17 00:00:00 2001 From: Alex Date: Fri, 16 Aug 2024 12:42:48 +0100 Subject: ci: update .deepsource.toml --- .deepsource.toml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/.deepsource.toml b/.deepsource.toml index a5002ab..b435c8b 100644 --- a/.deepsource.toml +++ b/.deepsource.toml @@ -17,9 +17,14 @@ name = "secrets" [[transformers]] name = "black" +enabled = "true" [[transformers]] name = "autopep8" +enabled = "true" [[transformers]] name = "ruff" +enabled = "true" + + -- cgit v1.2.3 From b3aa2d97e4844b6c205fc9b85427f8d5f150388a Mon Sep 17 00:00:00 2001 From: Alex Date: Fri, 16 Aug 2024 12:48:06 +0100 Subject: fix(ci): fix .deepsource.toml config --- .deepsource.toml | 3 --- 1 file changed, 3 deletions(-) diff --git a/.deepsource.toml b/.deepsource.toml index b435c8b..a840b78 100644 --- a/.deepsource.toml +++ b/.deepsource.toml @@ -17,14 +17,11 @@ name = "secrets" [[transformers]] name = "black" -enabled = "true" [[transformers]] name = "autopep8" -enabled = "true" [[transformers]] name = "ruff" -enabled = "true" -- cgit v1.2.3 From a217da60ba75a226bf72a9fc680c4cbabe883aea Mon Sep 17 00:00:00 2001 From: Alex Schofield Date: Fri, 16 Aug 2024 12:53:22 +0100 Subject: docs: add empty sections --- README.md | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 0bf6b9d..6bc75dc 100644 --- a/README.md +++ b/README.md @@ -34,4 +34,10 @@ others. - [ ] Automated data loading into the data warehouse schema - [ ] Logging and monitoring with CloudWatch - [ ] Notifications for errors and successful runs (e.g. successful ingestion) -- [ ] Visualisation of warehouse data \ No newline at end of file +- [ ] Visualisation of warehouse data + +# Test Coverage +TBA + +# Contributors +TBA \ No newline at end of file -- cgit v1.2.3 From dd68d948dec97fedfcaa89806523975ad1224c71 Mon Sep 17 00:00:00 2001 From: Ang Bel Date: Fri, 16 Aug 2024 13:48:22 +0100 Subject: refactoring for extract lambda to filter by last updated and if not empty write it s3 --- .gitignore | 2 ++ src/extract_lambda.py | 26 +++++++++++--------------- 2 files changed, 13 insertions(+), 15 deletions(-) diff --git a/.gitignore b/.gitignore index ca15434..bceab93 100644 --- a/.gitignore +++ b/.gitignore @@ -14,3 +14,5 @@ __pycache__/ # OS-Related Files .DS_Store + +*venv* diff --git a/src/extract_lambda.py b/src/extract_lambda.py index f4c0c1d..e348bef 100644 --- a/src/extract_lambda.py +++ b/src/extract_lambda.py @@ -136,9 +136,9 @@ def process_and_upload_tables(db, existing_files, client=boto3.client('s3')): print(tables) for table in tables: table_name = table[0] - rows = db.run(f"SELECT * FROM {table_name};") - + rows = db.run(f"SELECT * FROM {table_name} WHERE last_updated >= {datetime.strftime(latest_timestamp,'%H-%m-%d %H:%M:%S')};") + if rows: csv_file_path = f"/tmp/{table_name}.csv" with open(csv_file_path, "w", newline='') as file: writer = csv.writer(file) @@ -147,16 +147,12 @@ def process_and_upload_tables(db, existing_files, client=boto3.client('s3')): writer.writerow(column_names) writer.writerows(rows) s3_key = datetime.strftime(datetime.today(),f'{table_name}/%Y/%m/%d/{table_name}_%H:%M:%S.csv') - new_csv_content = open(csv_file_path, "r").read() - ## NEW CODE - latest_s3_object_key = datetime.strftime(latest_timestamp,f'{table_name}/%Y/%m/%d/{table_name}_%H:%M:%S.csv') - ## END OF NEW CODE - if existing_files[latest_s3_object_key] != new_csv_content: - try: - client.upload_file(csv_file_path, extract_bucket(), s3_key) - logger.info(f"Uploaded {s3_key} to S3.") - except ClientError as e: - logger.error(f'Error uploading to S3: {e}') - else: - logger.info(f"No new data.") - \ No newline at end of file + + try: + client.upload_file(csv_file_path, extract_bucket(), s3_key) + logger.info(f"Uploaded {s3_key} to S3.") + except ClientError as e: + logger.error(f'Error uploading to S3: {e}') + else: + logger.info(f"No new data.") + \ No newline at end of file -- cgit v1.2.3 From c284df39ed7735d736f4fe0f2571ba846b8f6315 Mon Sep 17 00:00:00 2001 From: "deepsource-autofix[bot]" <62050782+deepsource-autofix[bot]@users.noreply.github.com> Date: Fri, 16 Aug 2024 12:51:02 +0000 Subject: style: format code with Autopep8, Black and Ruff Formatter This commit fixes the style issues introduced in dd68d94 according to the output from Autopep8, Black and Ruff Formatter. Details: https://github.com/ajschofield/de-project-bentley/pull/47 --- src/extract_lambda.py | 130 +++++++++++++++++++++++++++----------------------- 1 file changed, 71 insertions(+), 59 deletions(-) diff --git a/src/extract_lambda.py b/src/extract_lambda.py index e348bef..323d04a 100644 --- a/src/extract_lambda.py +++ b/src/extract_lambda.py @@ -20,48 +20,49 @@ class DBConnectionException(Exception): self.message = str(e) super().__init__(self.message) + def lambda_handler(event, context): """This lambda function connects to the Totesys database, lists the contents of the ingestion bucket, - and converts all tables to CSV and if any of those tables do not exist in, or are different to the ones in s3, it uploads them - it uses 3 helper functions to achieve these 3 functionalities + and converts all tables to CSV and if any of those tables do not exist in, or are different to the ones in s3, it uploads them + it uses 3 helper functions to achieve these 3 functionalities """ try: db = connect_to_database() existing_files = list_existing_s3_files() any_changes = process_and_upload_tables(db, existing_files) - + if not any_changes: logger.info("No changes detected in the database.") return { - 'statusCode': 200, - 'body': json.dumps('No changes detected, no CSV files were uploaded.') + "statusCode": 200, + "body": json.dumps("No changes detected, no CSV files were uploaded."), } else: return { - 'statusCode': 200, - 'body': json.dumps('CSV files processed and uploaded successfully.') + "statusCode": 200, + "body": json.dumps("CSV files processed and uploaded successfully."), } except Exception as e: - logger.error(f'Error: {e}') - return { - 'statusCode': 500, - 'body': json.dumps('Internal server error.') - } + logger.error(f"Error: {e}") + return {"statusCode": 500, "body": json.dumps("Internal server error.")} finally: if db: db.close() -def retrieve_secrets(sm_client=boto3.client('secretsmanager'), secret_name='bentley-secrets'): +def retrieve_secrets( + sm_client=boto3.client("secretsmanager"), secret_name="bentley-secrets" +): try: response = sm_client.get_secret_value(SecretId=secret_name) - if 'SecretString' in response: - secret = json.loads(response['SecretString']) + if "SecretString" in response: + secret = json.loads(response["SecretString"]) return secret except ClientError as e: - logger.error(f'Could not retrieve secrets: {e}') + logger.error(f"Could not retrieve secrets: {e}") raise e + def connect_to_database() -> Connection: try: secrets = retrieve_secrets() @@ -72,87 +73,98 @@ def connect_to_database() -> Connection: database = secrets["database"] return Connection( - database=database, - user=user, - password=password, - host=host, - port=port + database=database, user=user, password=password, host=host, port=port ) except InterfaceError as i: - logger.error(f'Interface error: {i}') + logger.error(f"Interface error: {i}") raise DBConnectionException("Failed to connect to database") -def extract_bucket(client=boto3.client('s3')): + +def extract_bucket(client=boto3.client("s3")): response = client.list_buckets() - extract_bucket_filter = [bucket['Name'] for bucket in response['Buckets'] if 'extract' in bucket['Name']] + extract_bucket_filter = [ + bucket["Name"] for bucket in response["Buckets"] if "extract" in bucket["Name"] + ] return extract_bucket_filter[0] -def list_existing_s3_files(bucket_name=extract_bucket(), client=boto3.client('s3')): - """Creates a dictionary and populates it with the - results of listing the contents of the s3 bucket, then - returns the populated dictionary + +def list_existing_s3_files(bucket_name=extract_bucket(), client=boto3.client("s3")): + """Creates a dictionary and populates it with the + results of listing the contents of the s3 bucket, then + returns the populated dictionary """ - + existing_files = {} - + try: response = client.list_objects_v2(Bucket=bucket_name) - - if 'Contents' in response: - for obj in response['Contents']: - s3_key = obj['Key'] + + if "Contents" in response: + for obj in response["Contents"]: + s3_key = obj["Key"] try: file_obj = client.get_object(Bucket=bucket_name, Key=s3_key) - file_content = file_obj['Body'].read().decode('utf-8') + file_content = file_obj["Body"].read().decode("utf-8") existing_files[s3_key] = file_content except ClientError as e: - logger.error(f'Error retrieving S3 object {s3_key}: {e}') + logger.error(f"Error retrieving S3 object {s3_key}: {e}") else: - logger.error('The bucket is empty') - + logger.error("The bucket is empty") + except ClientError as e: - logger.error(f'Error listing S3 objects: {e}') - - return existing_files + logger.error(f"Error listing S3 objects: {e}") + return existing_files -def process_and_upload_tables(db, existing_files, client=boto3.client('s3')): - """Creates a list of the tables from a database query and - then selects everything from each table in individual queries - it then writes each table to CSV files and compares with the item - in the existing_files dictionary with the same name. If it finds any changes - to files, or new tables/files it uploads them to the s3 bucket +def process_and_upload_tables(db, existing_files, client=boto3.client("s3")): + """Creates a list of the tables from a database query and + then selects everything from each table in individual queries + it then writes each table to CSV files and compares with the item + in the existing_files dictionary with the same name. If it finds any changes + to files, or new tables/files it uploads them to the s3 bucket """ - ## NEW CODE + # NEW CODE all_datetimes = [] for file_names in existing_files.keys(): - datetime_str_on_s3 = ''.join(re.search(r'\/(.+/).+_(.+)\.csv',file_names).group(1,2)) - all_datetimes.append(datetime.strptime(datetime_str_on_s3, '%Y/%m/%d/%H:%M:%S')) + datetime_str_on_s3 = "".join( + re.search(r"\/(.+/).+_(.+)\.csv", file_names).group(1, 2) + ) + all_datetimes.append(datetime.strptime(datetime_str_on_s3, "%Y/%m/%d/%H:%M:%S")) latest_timestamp = max(all_datetimes) - ## END OF NEW CODE + # END OF NEW CODE - tables = db.run("SELECT table_name FROM information_schema.tables WHERE table_schema='public' AND table_type='BASE TABLE';") + tables = db.run( + "SELECT table_name FROM information_schema.tables WHERE table_schema='public' AND table_type='BASE TABLE';" + ) print(tables) for table in tables: table_name = table[0] - rows = db.run(f"SELECT * FROM {table_name} WHERE last_updated >= {datetime.strftime(latest_timestamp,'%H-%m-%d %H:%M:%S')};") + rows = db.run( + f"SELECT * FROM {table_name} WHERE last_updated >= {datetime.strftime(latest_timestamp,'%H-%m-%d %H:%M:%S')};" + ) if rows: csv_file_path = f"/tmp/{table_name}.csv" - with open(csv_file_path, "w", newline='') as file: + with open(csv_file_path, "w", newline="") as file: writer = csv.writer(file) - #column_names = [desc["name"] for desc in db.columns(f"SELECT * FROM {table_name};")] - column_names = [col_name[0] for col_name in db.run(f"SELECT column_name FROM INFORMATION_SCHEMA.COLUMNS where table_name = '{table_name}';")] + # column_names = [desc["name"] for desc in db.columns(f"SELECT * FROM {table_name};")] + column_names = [ + col_name[0] + for col_name in db.run( + f"SELECT column_name FROM INFORMATION_SCHEMA.COLUMNS where table_name = '{table_name}';" + ) + ] writer.writerow(column_names) writer.writerows(rows) - s3_key = datetime.strftime(datetime.today(),f'{table_name}/%Y/%m/%d/{table_name}_%H:%M:%S.csv') + s3_key = datetime.strftime( + datetime.today(), f"{table_name}/%Y/%m/%d/{table_name}_%H:%M:%S.csv" + ) try: client.upload_file(csv_file_path, extract_bucket(), s3_key) logger.info(f"Uploaded {s3_key} to S3.") except ClientError as e: - logger.error(f'Error uploading to S3: {e}') + logger.error(f"Error uploading to S3: {e}") else: logger.info(f"No new data.") - \ No newline at end of file -- cgit v1.2.3 From e97ab6b46f181db107b7a640f386f5f57480347c Mon Sep 17 00:00:00 2001 From: lian-manonog Date: Fri, 16 Aug 2024 14:16:03 +0100 Subject: add makefile in root: not in use currently --- Makefile | 80 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 80 insertions(+) create mode 100644 Makefile diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..077cd98 --- /dev/null +++ b/Makefile @@ -0,0 +1,80 @@ +############################################## +# # +# MAKEFILE TO BUILD THE PROJECT # +# # +############################################## + +PROJECT_NAME = de-project-bentley +REGION = eu-west-2 +PYTHON_INTERPRETER = python +WD=$(shell pwd) +PYTHONPATH=${WD} +SHELL := /bin/bash +PROFILE = default +PIP:=pip + +## PYTHON INTERPRETER ENVIRONMENT +create-environment: + @echo ">>> About to create environment: $(PROJECT_NAME)..." + @echo ">>> check python3 version" + ( \ + $(PYTHON_INTERPRETER) --version; \ + ) + @echo ">>> Setting up VirtualEnv." + ( \ + $(PIP) install -q virtualenv virtualenvwrapper; \ + virtualenv venv --python=$(PYTHON_INTERPRETER); \ + ) + +ACTIVATE_ENV := source venv/bin/activate + +# Execute python related functionalities from within the project's environment +define execute_in_env + $(ACTIVATE_ENV) && $1 +endef + +## Build the environment requirements +requirements: create-environment + $(call execute_in_env, $(PIP) install -r ./requirements.txt) + +# Set Up +## Install bandit +bandit: + $(call execute_in_env, $(PIP) install bandit) + +## Install safety +safety: + $(call execute_in_env, $(PIP) install safety) + +## Install black +black: + $(call execute_in_env, $(PIP) install black) + +## Install coverage +coverage: + $(call execute_in_env, $(PIP) install coverage) + +## Set up dev requirements (bandit, safety, black) +dev-setup: bandit safety black coverage + +# Build / Run + +## Run the security test (bandit + safety) +security-test: + $(call execute_in_env, safety check -r ./requirements.txt) + $(call execute_in_env, bandit -lll */*.py *c/*/*.py) + +## Run the black code check +run-black: + $(call execute_in_env, black ./src/*/*.py ./test/*/*.py) + +## Run the unit tests +unit-test: + $(call execute_in_env, PYTHONPATH=${PYTHONPATH} pytest -v) + +## Run the coverage check +check-coverage: + $(call execute_in_env, PYTHONPATH=${PYTHONPATH} pytest --cov=src test/) + +## Run all checks +run-checks: security-test run-black unit-test check-coverage -- cgit v1.2.3 From 2bcedc300f36760b55f0db8cfb4e724362d1c251 Mon Sep 17 00:00:00 2001 From: Alex Schofield Date: Fri, 16 Aug 2024 14:27:41 +0100 Subject: chore(ci): remove redundant on-commit.yml --- .github/workflows/on-commit.yml | 50 ----------------------------------------- 1 file changed, 50 deletions(-) delete mode 100644 .github/workflows/on-commit.yml diff --git a/.github/workflows/on-commit.yml b/.github/workflows/on-commit.yml deleted file mode 100644 index fd9ffb8..0000000 --- a/.github/workflows/on-commit.yml +++ /dev/null @@ -1,50 +0,0 @@ -name: commit-qc-checks - -on: - push: - branches-ignore: - - 'main' - -jobs: - python-quality-checks: - runs-on: ubuntu-latest - steps: - - uses : actions/checkout@v4 - - name : 'Python: Setup' - uses : actions/setup-python@v5 - with: - python-version: 3.11 - - name : 'Python: Install Dependencies' - run: | - python -m pip install --upgrade pip - pip install flake8 pylint black bandit safety - continue-on-error: true - - name : 'Python: Linting' - run: | - flake8 . - find . -name "*.py" | xargs pylint - continue-on-error: true - - name : 'Python: Formatting' - run: | - black --check . - continue-on-error: true - terraform-quality-checks: - runs-on: ubuntu-latest - steps: - - uses : actions/checkout@v4 - - name: 'Terraform: Setup' - uses: hashicorp/setup-terraform@v3 - with: - terraform_version: latest - - name: 'Terraform: Formatting' - working-directory: terraform - run: terraform fmt -check -recursive - continue-on-error: true - - name: 'Terraform: Initialise' - working-directory: terraform - run: terraform init -backend=false - continue-on-error: true - - name: 'Terraform: Validate' - working-directory: terraform - run: terraform validate - continue-on-error: true \ No newline at end of file -- cgit v1.2.3 From cf3d366e730e88ceea194d5b3b1d1a3ddecdd944 Mon Sep 17 00:00:00 2001 From: Alex Schofield Date: Fri, 16 Aug 2024 14:30:07 +0100 Subject: ci: deploy only on push/pr to main --- .github/workflows/deploy.yml | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml index bd9df57..db51d20 100644 --- a/.github/workflows/deploy.yml +++ b/.github/workflows/deploy.yml @@ -1,10 +1,13 @@ name: deploy-terraform on: - push: + pull-request: branches: - - ci-cd-branch # Adjust the branch based on our deployment strategy - + - main + pull: + branches: + - main + jobs: deploy-terraform: @@ -36,4 +39,4 @@ jobs: - name: Terraform Apply working-directory: terraform - run: terraform apply --auto-approve \ No newline at end of file + run: terraform apply --auto-approve -- cgit v1.2.3 From 63b5f3e5f1888d5653d2f7b3529b3d72e3315dbf Mon Sep 17 00:00:00 2001 From: Alex Schofield Date: Fri, 16 Aug 2024 14:43:46 +0100 Subject: fix(ci): amend pull_request syntax --- .github/workflows/deploy.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml index db51d20..00c7263 100644 --- a/.github/workflows/deploy.yml +++ b/.github/workflows/deploy.yml @@ -1,7 +1,7 @@ name: deploy-terraform on: - pull-request: + pull_request: branches: - main pull: -- cgit v1.2.3 From 9cec304b2f8c2832c4a715bba784a34f7c674c19 Mon Sep 17 00:00:00 2001 From: Alex Schofield Date: Fri, 16 Aug 2024 14:52:35 +0100 Subject: fix(ci): amend pull to push --- .github/workflows/deploy.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml index 00c7263..5672048 100644 --- a/.github/workflows/deploy.yml +++ b/.github/workflows/deploy.yml @@ -4,7 +4,7 @@ on: pull_request: branches: - main - pull: + push: branches: - main -- cgit v1.2.3 From aba65e0db08625c1ef0d3db6076b54e56e0b45ea Mon Sep 17 00:00:00 2001 From: Ang Bel Date: Fri, 16 Aug 2024 14:20:39 +0100 Subject: refactor following github actions major risk message --- src/extract_lambda.py | 65 ++++++++++++++++++++++++--------------------------- 1 file changed, 31 insertions(+), 34 deletions(-) diff --git a/src/extract_lambda.py b/src/extract_lambda.py index 323d04a..cc09e87 100644 --- a/src/extract_lambda.py +++ b/src/extract_lambda.py @@ -30,8 +30,8 @@ def lambda_handler(event, context): db = connect_to_database() existing_files = list_existing_s3_files() any_changes = process_and_upload_tables(db, existing_files) - - if not any_changes: + + if not any_changes['updated']: logger.info("No changes detected in the database.") return { "statusCode": 200, @@ -39,8 +39,9 @@ def lambda_handler(event, context): } else: return { - "statusCode": 200, - "body": json.dumps("CSV files processed and uploaded successfully."), + 'statusCode': 200, + 'body': json.dumps(f"""CSV files processed for {', '.join(any_changes['updated'])} and uploaded successfully.{ + 'The following tables were not updated: '+', '.join(any_changes['no change']) if any_changes['no change'] else ''}""") } except Exception as e: logger.error(f"Error: {e}") @@ -124,7 +125,8 @@ def process_and_upload_tables(db, existing_files, client=boto3.client("s3")): in the existing_files dictionary with the same name. If it finds any changes to files, or new tables/files it uploads them to the s3 bucket """ - # NEW CODE + load_status = {'updated':[],'no change':[]} + ## Retrieving the latest file timestamp from S3 extract bucket all_datetimes = [] for file_names in existing_files.keys(): datetime_str_on_s3 = "".join( @@ -132,39 +134,34 @@ def process_and_upload_tables(db, existing_files, client=boto3.client("s3")): ) all_datetimes.append(datetime.strptime(datetime_str_on_s3, "%Y/%m/%d/%H:%M:%S")) latest_timestamp = max(all_datetimes) - # END OF NEW CODE - tables = db.run( - "SELECT table_name FROM information_schema.tables WHERE table_schema='public' AND table_type='BASE TABLE';" - ) - print(tables) + ## Iterating through tables on the database and retrieving only latest changes vs previous file load + tables = db.run("SELECT table_name FROM information_schema.tables WHERE table_schema='public' AND table_type='BASE TABLE';") for table in tables: table_name = table[0] rows = db.run( f"SELECT * FROM {table_name} WHERE last_updated >= {datetime.strftime(latest_timestamp,'%H-%m-%d %H:%M:%S')};" ) - if rows: - csv_file_path = f"/tmp/{table_name}.csv" - with open(csv_file_path, "w", newline="") as file: - writer = csv.writer(file) - # column_names = [desc["name"] for desc in db.columns(f"SELECT * FROM {table_name};")] - column_names = [ - col_name[0] - for col_name in db.run( - f"SELECT column_name FROM INFORMATION_SCHEMA.COLUMNS where table_name = '{table_name}';" - ) - ] - writer.writerow(column_names) - writer.writerows(rows) - s3_key = datetime.strftime( - datetime.today(), f"{table_name}/%Y/%m/%d/{table_name}_%H:%M:%S.csv" - ) - - try: - client.upload_file(csv_file_path, extract_bucket(), s3_key) - logger.info(f"Uploaded {s3_key} to S3.") - except ClientError as e: - logger.error(f"Error uploading to S3: {e}") - else: - logger.info(f"No new data.") + ## Creating a temporary file path and writing the column name to it followed by each row of data + if rows: + csv_file_path = f"/tmp/{table_name}.csv" + with open(csv_file_path, "w", newline='') as file: + writer = csv.writer(file) + #column_names = [desc["name"] for desc in db.columns(f"SELECT * FROM {table_name};")] + column_names = [col_name[0] for col_name in db.run(f"SELECT column_name FROM INFORMATION_SCHEMA.COLUMNS where table_name = '{table_name}';")] + writer.writerow(column_names) + writer.writerows(rows) + s3_key = datetime.strftime(datetime.today(),f'{table_name}/%Y/%m/%d/{table_name}_%H:%M:%S.csv') + + ## Writing the new file to S3 extract bucket: + try: + client.upload_file(csv_file_path, extract_bucket(), s3_key) + load_status['updated'].append(table_name) + logger.info(f"Uploaded {s3_key} to S3.") + except ClientError as e: + logger.error(f'Error uploading to S3: {e}') + else: + load_status['no change'].append(table_name) + logger.info(f"No new data in {table_name} name. Latest data retrieved is from {latest_timestamp}.") + return load_status -- cgit v1.2.3 From 4428b8d9e8903e93ca2efd9f95cea9205bf303a9 Mon Sep 17 00:00:00 2001 From: Ang Bel Date: Fri, 16 Aug 2024 14:42:15 +0100 Subject: refactoring to be more in line with pythonic code practices and prevent sql injection --- src/extract_lambda.py | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/src/extract_lambda.py b/src/extract_lambda.py index cc09e87..d1a5c7c 100644 --- a/src/extract_lambda.py +++ b/src/extract_lambda.py @@ -1,4 +1,4 @@ -from pg8000.native import Connection, InterfaceError +from pg8000.native import Connection, InterfaceError, identifier import boto3 import csv from botocore.exceptions import ClientError @@ -136,12 +136,15 @@ def process_and_upload_tables(db, existing_files, client=boto3.client("s3")): latest_timestamp = max(all_datetimes) ## Iterating through tables on the database and retrieving only latest changes vs previous file load - tables = db.run("SELECT table_name FROM information_schema.tables WHERE table_schema='public' AND table_type='BASE TABLE';") + tables = db.run(""" + SELECT table_name + FROM information_schema.tables + WHERE table_schema='public' AND table_type='BASE TABLE';""") for table in tables: table_name = table[0] - rows = db.run( - f"SELECT * FROM {table_name} WHERE last_updated >= {datetime.strftime(latest_timestamp,'%H-%m-%d %H:%M:%S')};" - ) + rows = db.run(f"SELECT * FROM {identifier(table_name)} " + "WHERE last_updated >= :latest;", + latest={datetime.strftime(latest_timestamp,'%H-%m-%d %H:%M:%S')}) ## Creating a temporary file path and writing the column name to it followed by each row of data if rows: @@ -149,7 +152,9 @@ def process_and_upload_tables(db, existing_files, client=boto3.client("s3")): with open(csv_file_path, "w", newline='') as file: writer = csv.writer(file) #column_names = [desc["name"] for desc in db.columns(f"SELECT * FROM {table_name};")] - column_names = [col_name[0] for col_name in db.run(f"SELECT column_name FROM INFORMATION_SCHEMA.COLUMNS where table_name = '{table_name}';")] + column_names = [col_name[0] for col_name in + db.run("""SELECT column_name FROM INFORMATION_SCHEMA.COLUMNS + WHERE table_name = :table ;""", table=table_name)] writer.writerow(column_names) writer.writerows(rows) s3_key = datetime.strftime(datetime.today(),f'{table_name}/%Y/%m/%d/{table_name}_%H:%M:%S.csv') -- cgit v1.2.3 From e153f2072eafca2c83a84e2c4210c46a40dabaf4 Mon Sep 17 00:00:00 2001 From: "deepsource-autofix[bot]" <62050782+deepsource-autofix[bot]@users.noreply.github.com> Date: Fri, 16 Aug 2024 14:36:15 +0000 Subject: style: format code with Autopep8, Black and Ruff Formatter This commit fixes the style issues introduced in 4428b8d according to the output from Autopep8, Black and Ruff Formatter. Details: https://github.com/ajschofield/de-project-bentley/pull/47 --- src/extract_lambda.py | 66 +++++++++++++++++++++++++++++++-------------------- 1 file changed, 40 insertions(+), 26 deletions(-) diff --git a/src/extract_lambda.py b/src/extract_lambda.py index d1a5c7c..9a0e509 100644 --- a/src/extract_lambda.py +++ b/src/extract_lambda.py @@ -30,8 +30,8 @@ def lambda_handler(event, context): db = connect_to_database() existing_files = list_existing_s3_files() any_changes = process_and_upload_tables(db, existing_files) - - if not any_changes['updated']: + + if not any_changes["updated"]: logger.info("No changes detected in the database.") return { "statusCode": 200, @@ -39,9 +39,11 @@ def lambda_handler(event, context): } else: return { - 'statusCode': 200, - 'body': json.dumps(f"""CSV files processed for {', '.join(any_changes['updated'])} and uploaded successfully.{ - 'The following tables were not updated: '+', '.join(any_changes['no change']) if any_changes['no change'] else ''}""") + "statusCode": 200, + "body": json.dumps( + f"""CSV files processed for {', '.join(any_changes['updated'])} and uploaded successfully.{ + 'The following tables were not updated: '+', '.join(any_changes['no change']) if any_changes['no change'] else ''}""" + ), } except Exception as e: logger.error(f"Error: {e}") @@ -125,8 +127,8 @@ def process_and_upload_tables(db, existing_files, client=boto3.client("s3")): in the existing_files dictionary with the same name. If it finds any changes to files, or new tables/files it uploads them to the s3 bucket """ - load_status = {'updated':[],'no change':[]} - ## Retrieving the latest file timestamp from S3 extract bucket + load_status = {"updated": [], "no change": []} + # Retrieving the latest file timestamp from S3 extract bucket all_datetimes = [] for file_names in existing_files.keys(): datetime_str_on_s3 = "".join( @@ -135,38 +137,50 @@ def process_and_upload_tables(db, existing_files, client=boto3.client("s3")): all_datetimes.append(datetime.strptime(datetime_str_on_s3, "%Y/%m/%d/%H:%M:%S")) latest_timestamp = max(all_datetimes) - ## Iterating through tables on the database and retrieving only latest changes vs previous file load - tables = db.run(""" + # Iterating through tables on the database and retrieving only latest changes vs previous file load + tables = db.run( + """ SELECT table_name FROM information_schema.tables - WHERE table_schema='public' AND table_type='BASE TABLE';""") + WHERE table_schema='public' AND table_type='BASE TABLE';""" + ) for table in tables: table_name = table[0] - rows = db.run(f"SELECT * FROM {identifier(table_name)} " - "WHERE last_updated >= :latest;", - latest={datetime.strftime(latest_timestamp,'%H-%m-%d %H:%M:%S')}) + rows = db.run( + f"SELECT * FROM {identifier(table_name)} " "WHERE last_updated >= :latest;", + latest={datetime.strftime(latest_timestamp, "%H-%m-%d %H:%M:%S")}, + ) - ## Creating a temporary file path and writing the column name to it followed by each row of data + # Creating a temporary file path and writing the column name to it followed by each row of data if rows: csv_file_path = f"/tmp/{table_name}.csv" - with open(csv_file_path, "w", newline='') as file: + with open(csv_file_path, "w", newline="") as file: writer = csv.writer(file) - #column_names = [desc["name"] for desc in db.columns(f"SELECT * FROM {table_name};")] - column_names = [col_name[0] for col_name in - db.run("""SELECT column_name FROM INFORMATION_SCHEMA.COLUMNS - WHERE table_name = :table ;""", table=table_name)] + # column_names = [desc["name"] for desc in db.columns(f"SELECT * FROM {table_name};")] + column_names = [ + col_name[0] + for col_name in db.run( + """SELECT column_name FROM INFORMATION_SCHEMA.COLUMNS + WHERE table_name = :table ;""", + table=table_name, + ) + ] writer.writerow(column_names) writer.writerows(rows) - s3_key = datetime.strftime(datetime.today(),f'{table_name}/%Y/%m/%d/{table_name}_%H:%M:%S.csv') + s3_key = datetime.strftime( + datetime.today(), f"{table_name}/%Y/%m/%d/{table_name}_%H:%M:%S.csv" + ) - ## Writing the new file to S3 extract bucket: + # Writing the new file to S3 extract bucket: try: client.upload_file(csv_file_path, extract_bucket(), s3_key) - load_status['updated'].append(table_name) + load_status["updated"].append(table_name) logger.info(f"Uploaded {s3_key} to S3.") except ClientError as e: - logger.error(f'Error uploading to S3: {e}') + logger.error(f"Error uploading to S3: {e}") else: - load_status['no change'].append(table_name) - logger.info(f"No new data in {table_name} name. Latest data retrieved is from {latest_timestamp}.") - return load_status + load_status["no change"].append(table_name) + logger.info( + f"No new data in {table_name} name. Latest data retrieved is from {latest_timestamp}." + ) + return load_status -- cgit v1.2.3 From 890ca0434ce5f7c9e7bdba1482a86cd63a4ef8f9 Mon Sep 17 00:00:00 2001 From: Ang Bel Date: Fri, 16 Aug 2024 15:45:03 +0100 Subject: dummy comment to test checks --- src/extract_lambda.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/extract_lambda.py b/src/extract_lambda.py index 9a0e509..30c7005 100644 --- a/src/extract_lambda.py +++ b/src/extract_lambda.py @@ -11,7 +11,7 @@ import re logger = logging.getLogger() logger.setLevel(logging.INFO) - +## DB Exception class class DBConnectionException(Exception): """Wraps pg8000.native Error or DatabaseError.""" -- cgit v1.2.3 From 653cb35e50b339356274ff03c0d75ac3babf927f Mon Sep 17 00:00:00 2001 From: "deepsource-autofix[bot]" <62050782+deepsource-autofix[bot]@users.noreply.github.com> Date: Fri, 16 Aug 2024 14:45:16 +0000 Subject: style: format code with Autopep8, Black and Ruff Formatter This commit fixes the style issues introduced in 890ca04 according to the output from Autopep8, Black and Ruff Formatter. Details: https://github.com/ajschofield/de-project-bentley/pull/47 --- src/extract_lambda.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/extract_lambda.py b/src/extract_lambda.py index 30c7005..4168e27 100644 --- a/src/extract_lambda.py +++ b/src/extract_lambda.py @@ -11,7 +11,9 @@ import re logger = logging.getLogger() logger.setLevel(logging.INFO) -## DB Exception class +# DB Exception class + + class DBConnectionException(Exception): """Wraps pg8000.native Error or DatabaseError.""" -- cgit v1.2.3 From 39a33cecb5e19f15bed4a099b02bdba56c80c073 Mon Sep 17 00:00:00 2001 From: HastarTara Date: Fri, 16 Aug 2024 16:05:03 +0100 Subject: infra[tf] update lambda extract doesnt work yet --- terraform/lambda.tf | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/terraform/lambda.tf b/terraform/lambda.tf index 658b8c8..71ddd11 100644 --- a/terraform/lambda.tf +++ b/terraform/lambda.tf @@ -83,7 +83,7 @@ resource "aws_lambda_function" "load_lambda" { } locals { - layer_dir = "${path.module}/../python" + layer_dir = "${path.module}/.." requirements = "${path.module}/../requirements.txt" layer_zip = "${path.module}/../layer.zip" } @@ -94,9 +94,9 @@ resource "null_resource" "prepare_layer" { } provisioner "local-exec" { command = < Date: Fri, 16 Aug 2024 16:23:56 +0100 Subject: infra(tf): add version constraints for null and archive --- terraform/main.tf | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/terraform/main.tf b/terraform/main.tf index 3b06701..310a251 100644 --- a/terraform/main.tf +++ b/terraform/main.tf @@ -4,6 +4,14 @@ terraform { source = "hashicorp/aws" version = "~>5.0" } + null = { + source = "hashicorp/null" + version = "~>3.2.2" + } + archive = { + source = "hashicorp/archive" + version = "~>2.5.0" + } } backend "s3" { bucket = "bentley-project-secrets" -- cgit v1.2.3 From 303725f83cf5551b3d165aa02ce81562de488a01 Mon Sep 17 00:00:00 2001 From: Alex Schofield Date: Fri, 16 Aug 2024 16:24:44 +0100 Subject: infra(tf): re-add code that creates layer zip --- terraform/lambda.tf | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/terraform/lambda.tf b/terraform/lambda.tf index 71ddd11..67fd6eb 100644 --- a/terraform/lambda.tf +++ b/terraform/lambda.tf @@ -83,9 +83,9 @@ resource "aws_lambda_function" "load_lambda" { } locals { - layer_dir = "${path.module}/.." - requirements = "${path.module}/../requirements.txt" - layer_zip = "${path.module}/../layer.zip" + layer_dir = "${path.module}/.." + requirements = "${path.module}/../requirements.txt" + layer_zip = "${path.module}/../layer.zip" } resource "null_resource" "prepare_layer" { @@ -96,23 +96,23 @@ resource "null_resource" "prepare_layer" { command = < Date: Fri, 16 Aug 2024 16:28:04 +0100 Subject: chore(tf): remove dummy username/password sorry hackers! --- terraform/rds.tf | 14 ++------------ 1 file changed, 2 insertions(+), 12 deletions(-) diff --git a/terraform/rds.tf b/terraform/rds.tf index d1b4959..a013fb3 100644 --- a/terraform/rds.tf +++ b/terraform/rds.tf @@ -60,18 +60,8 @@ # allocated_storage = 5 # engine = "postgres" # engine_version = "14.10" -# username = "totes" -# password = "totes123" -# # username = "user credentials for the root user" # we could use .env here -# # password = "user password for the root user" # we could use .env here -# ### alternatively to providing username nad password we can specify: -# # resource "aws_kms_key" "example_key" { -# # description = "Example KMS Key" -# # } -# # within the resource: -# # manage_master_user_password = true -# # master_user_secret_kms_key_id = aws_kms_key.example.key_id -# # } +# username = "" +# password = "" # db_subnet_group_name = aws_db_subnet_group.Terrific-Totes-sub-gr.name # vpc_security_group_ids = [aws_security_group.rds.id] # parameter_group_name = aws_db_parameter_group.Terrific-Totes-param-gr.name -- cgit v1.2.3 From 1e27974ecc48d8611b87af1b9cd51e29afa8c792 Mon Sep 17 00:00:00 2001 From: Alex Schofield Date: Fri, 16 Aug 2024 17:15:59 +0100 Subject: test(fx): fix prepare_layer - broken --- terraform/lambda.tf | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/terraform/lambda.tf b/terraform/lambda.tf index 67fd6eb..27e6266 100644 --- a/terraform/lambda.tf +++ b/terraform/lambda.tf @@ -89,14 +89,13 @@ locals { } resource "null_resource" "prepare_layer" { - triggers = { - requirements_hash = filesha1(local.requirements) - } provisioner "local-exec" { command = < Date: Fri, 16 Aug 2024 21:06:51 +0100 Subject: docs: add badges to README --- README.md | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/README.md b/README.md index 6bc75dc..cbb446c 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,13 @@ # ToteSys - Data Engineering Project +[![Python](https://img.shields.io/badge/Python-FFD43B?style=for-the-badge&logo=python&logoColor=blue)](https://www.python.org/) +[![AWS](https://img.shields.io/badge/Amazon_AWS-FF9900?style=for-the-badge&logo=amazonaws&logoColor=white)](https://aws.amazon.com/) +[![Terraform](https://img.shields.io/badge/Terraform-7B42BC?style=for-the-badge&logo=terraform&logoColor=white)](https://www.terraform.io/) +[![Postgresql](https://img.shields.io/badge/PostgreSQL-316192?style=for-the-badge&logo=postgresql&logoColor=white)](https://www.postgresql.org/) +[![GitHub Actions](https://img.shields.io/badge/GitHub_Actions-2088FF?style=for-the-badge&logo=github-actions&logoColor=white)](https://github.com/features/actions) + +[![Terraform Main Deployment Workflow Status](https://img.shields.io/github/actions/workflow/status/ajschofield/de-project-bentley/deploy.yml?branch=main&style=flat-square&label=deploy)](https://github.com/ajschofield/de-project-bentley/actions/workflows/deploy.yml?query=branch%3Amain) +[![Production Environment Status](https://img.shields.io/github/deployments/ajschofield/de-project-bentley/production?style=flat-square&label=env)](https://github.com/ajschofield/de-project-bentley/deployments/production) # Summary The project aims to implement a data platform that can extract data from an operational database, archive it in a data lake, and make it easily accessible -- cgit v1.2.3 From 95ad71be4315f5ae3f9183f66049ae8b8cf914fc Mon Sep 17 00:00:00 2001 From: "deepsource-autofix[bot]" <62050782+deepsource-autofix[bot]@users.noreply.github.com> Date: Fri, 16 Aug 2024 20:07:43 +0000 Subject: style: format code with Autopep8, Black and Ruff Formatter This commit fixes the style issues introduced in 9dabc89 according to the output from Autopep8, Black and Ruff Formatter. Details: https://github.com/ajschofield/de-project-bentley/pull/52 --- src/load_lambda.py | 2 +- src/secrets_manager.py | 31 +++++++++---------- src/transform_lambda.py | 2 +- test/test_secrets_manager.py | 19 +++++++----- tests/test_extract_lambda.py | 69 ++++++++++++++++++++++++------------------- tests/test_secrets_manager.py | 37 +++++++++++++++-------- 6 files changed, 93 insertions(+), 67 deletions(-) diff --git a/src/load_lambda.py b/src/load_lambda.py index 6ee681f..c6a8e60 100644 --- a/src/load_lambda.py +++ b/src/load_lambda.py @@ -1,2 +1,2 @@ def lambda_handler(): - pass \ No newline at end of file + pass diff --git a/src/secrets_manager.py b/src/secrets_manager.py index c0fb61e..3484688 100644 --- a/src/secrets_manager.py +++ b/src/secrets_manager.py @@ -4,45 +4,46 @@ import json def sm_client(): - sm_client = boto3.client('secretsmanager') + sm_client = boto3.client("secretsmanager") yield sm_client -def create_secret(sm_client, secret_name, cohort_id, user, password, host, database, port): + +def create_secret( + sm_client, secret_name, cohort_id, user, password, host, database, port +): secret = { "cohort_id": cohort_id, "user": user, "password": password, "host": host, "database": database, - "port": port + "port": port, } response = sm_client.create_secret( - Name = secret_name, - SecretString = json.dumps(secret) + Name=secret_name, SecretString=json.dumps(secret) ) print(response) return response + def list_secret(sm_client): response = sm_client.list_secrets() - secret_dict = response['SecretList'] + secret_dict = response["SecretList"] secret_names = [] for items in secret_dict: - secret_names.append(items['Name']) - print(f'{len(secret_names)} secret(s) available') + secret_names.append(items["Name"]) + print(f"{len(secret_names)} secret(s) available") for name in secret_names: print(name) return secret_names -def retrieve_secrets(sm_client): - response = sm_client.get_secrets( - - ) +def retrieve_secrets(sm_client): + response = sm_client.get_secrets() -#retrieve secret -#so lambda can access totesy db -#so lambda connect to the db and then retrieve the data \ No newline at end of file +# retrieve secret +# so lambda can access totesy db +# so lambda connect to the db and then retrieve the data diff --git a/src/transform_lambda.py b/src/transform_lambda.py index 6ee681f..c6a8e60 100644 --- a/src/transform_lambda.py +++ b/src/transform_lambda.py @@ -1,2 +1,2 @@ def lambda_handler(): - pass \ No newline at end of file + pass diff --git a/test/test_secrets_manager.py b/test/test_secrets_manager.py index 86533bc..cb4ec15 100644 --- a/test/test_secrets_manager.py +++ b/test/test_secrets_manager.py @@ -2,10 +2,12 @@ from src.secrets_manager import sm_client, create_secret, list_secret import boto3 from moto import mock_aws import json -import pytest +import pytest import os -pytest.fixture(scope='class') +pytest.fixture(scope="class") + + def mock_aws_credentials(): """Mocked AWS Credentials for moto.""" os.environ["AWS_ACCESS_KEY_ID"] = "testing" @@ -14,10 +16,11 @@ def mock_aws_credentials(): os.environ["AWS_SESSION_TOKEN"] = "testing" os.environ["AWS_DEFAULT_REGION"] = "eu-west-2" -@pytest.fixture(scope='class') + +@pytest.fixture(scope="class") def mock_sm_client(mock_aws_credentials): with mock_aws(): - yield boto3.client('secretsmanager') + yield boto3.client("secretsmanager") def test_create_secret_stores_secrets(mock_sm_client): @@ -29,6 +32,8 @@ def test_create_secret_stores_secrets(mock_sm_client): port = "test_port" secret_name = "test_secret" - response = create_secret(mock_sm_client, secret_name, cohort_id, user, password, host, database, port) - - assert response['Name'] == secret_name \ No newline at end of file + response = create_secret( + mock_sm_client, secret_name, cohort_id, user, password, host, database, port + ) + + assert response["Name"] == secret_name diff --git a/tests/test_extract_lambda.py b/tests/test_extract_lambda.py index e94a8a4..877e36a 100644 --- a/tests/test_extract_lambda.py +++ b/tests/test_extract_lambda.py @@ -3,11 +3,17 @@ import boto3 from moto import mock_aws from unittest.mock import patch, MagicMock from unittest import TestCase -from src.extract_lambda import list_existing_s3_files, connect_to_database, DBConnectionException, process_and_upload_tables -import os +from src.extract_lambda import ( + list_existing_s3_files, + connect_to_database, + DBConnectionException, + process_and_upload_tables, +) +import os import logging -@pytest.fixture(scope='class') + +@pytest.fixture(scope="class") def mock_config(): env_vars = { "host": "abc", @@ -20,54 +26,55 @@ def mock_config(): yield mock_config -@pytest.fixture(scope='class') +@pytest.fixture(scope="class") def aws_credentials(): - os.environ["AWS_ACCESS_KEY_ID"] = 'testing' - os.environ["AWS_SECRET_ACCESS_KEY"] = 'testing' - os.environ["AWS_SECURIT_TOKEN"] = 'testing' - os.environ["AWS_SESSION_TOKEN"] = 'testing' - os.environ["AWS_DEFAULT_REGION"]= 'eu-west-2' + os.environ["AWS_ACCESS_KEY_ID"] = "testing" + os.environ["AWS_SECRET_ACCESS_KEY"] = "testing" + os.environ["AWS_SECURIT_TOKEN"] = "testing" + os.environ["AWS_SESSION_TOKEN"] = "testing" + os.environ["AWS_DEFAULT_REGION"] = "eu-west-2" + -@pytest.fixture(scope='class') +@pytest.fixture(scope="class") def s3_client(aws_credentials): with mock_aws(): - yield boto3.client('s3') + yield boto3.client("s3") + class TestListExistingS3Files: def test_error_if_no_bucket(self, s3_client, caplog): - logger = logging.getLogger() - logger.info('Testing now.') + logger.info("Testing now.") caplog.set_level(logging.ERROR) list_existing_s3_files(client=s3_client) - assert 'Error listing S3 objects' in caplog.text + assert "Error listing S3 objects" in caplog.text def test_error_if_bucket_is_empty(self, s3_client, caplog): - - s3_client.create_bucket(Bucket='extract_bucket', - CreateBucketConfiguration={ - 'LocationConstraint': 'eu-west-2' - }) + s3_client.create_bucket( + Bucket="extract_bucket", + CreateBucketConfiguration={"LocationConstraint": "eu-west-2"}, + ) list_existing_s3_files(client=s3_client) - assert 'The bucket is empty' in caplog.text + assert "The bucket is empty" in caplog.text def test_error_retrieving_object(self, s3_client, caplog): - s3_client.upload_file('tests/dummy.txt', 'extract_bucket', 'dummy.txt') - list_existing_s3_files(bucket_name='test_bucket', client=s3_client) + s3_client.upload_file("tests/dummy.txt", "extract_bucket", "dummy.txt") + list_existing_s3_files(bucket_name="test_bucket", client=s3_client) - assert 'Error retrieving S3 object ' in caplog.text + assert "Error retrieving S3 object " in caplog.text def test_retrieves_file_content(self, s3_client, caplog): result = list_existing_s3_files(client=s3_client) - assert list(result.values()) == ['This is a test file.'] + assert list(result.values()) == ["This is a test file."] + class TestConnectToDatabase: def test_connect_to_database(mock_conn, mock_config): - with patch("src.extract_lambda.Connection", autospec=True) as mock_conn: + with patch("src.extract_lambda.Connection", autospec=True) as mock_conn: connect_to_database() mock_conn.assert_called_with( - host="abc", user="def", port="5432", password="password", database="db" + host="abc", user="def", port="5432", password="password", database="db" ) def test_database_error(self, mock_config): @@ -76,12 +83,14 @@ class TestConnectToDatabase: def test_logs_interface_error(self, caplog): logger = logging.getLogger() - logger.info('Testing now.') + logger.info("Testing now.") caplog.set_level(logging.ERROR) with pytest.raises(DBConnectionException): connect_to_database() - assert 'Interface error' in caplog.text -''' + assert "Interface error" in caplog.text + + +""" class TestProcessAndUploadTables: def test_error_process_and_upload_tables(mock_conn, mock_config, s3_client, caplog): logger = logging.getLogger() @@ -106,4 +115,4 @@ class TestProcessAndUploadTables: s3_client.upload_file('tests/dummy_identical.csv', 'extract_bucket', s3_key) process_and_upload_tables(mock_db(), existing_files, client=s3_client) assert 'No new data.' in caplog.text -''' \ No newline at end of file +""" diff --git a/tests/test_secrets_manager.py b/tests/test_secrets_manager.py index a30be86..609c572 100644 --- a/tests/test_secrets_manager.py +++ b/tests/test_secrets_manager.py @@ -3,10 +3,11 @@ import boto3 import botocore.exceptions from moto import mock_aws import json -import pytest +import pytest import os -@pytest.fixture(scope='function') + +@pytest.fixture(scope="function") def aws_credentials(): """Mocked AWS Credentials for moto.""" os.environ["AWS_ACCESS_KEY_ID"] = "testing" @@ -15,12 +16,14 @@ def aws_credentials(): os.environ["AWS_SESSION_TOKEN"] = "testing" os.environ["AWS_DEFAULT_REGION"] = "eu-west-2" -@pytest.fixture(scope='function') + +@pytest.fixture(scope="function") def mock_sm_client(aws_credentials): with mock_aws(): yield boto3.client("secretsmanager") -@pytest.fixture(scope='function') + +@pytest.fixture(scope="function") def mock_store_secret(mock_sm_client): secret = { "cohort_id": "test_cohort_id", @@ -28,15 +31,18 @@ def mock_store_secret(mock_sm_client): "password": "test_password", "host": "test_host", "database": "test_database", - "port": "test_port" + "port": "test_port", } secret_name = "test_secret" - response = mock_sm_client.create_secret(Name=secret_name, SecretString=json.dumps(secret)) + response = mock_sm_client.create_secret( + Name=secret_name, SecretString=json.dumps(secret) + ) return response + def test_retrieves_secrets_returns_dictionary(mock_sm_client, mock_store_secret): secret_name = "test_secret" @@ -44,8 +50,10 @@ def test_retrieves_secrets_returns_dictionary(mock_sm_client, mock_store_secret) assert isinstance(result, dict) -def test_retrieves_secrets_returns_correct_keys_and_values(mock_sm_client, mock_store_secret): +def test_retrieves_secrets_returns_correct_keys_and_values( + mock_sm_client, mock_store_secret +): secret_name = "test_secret" result = retrieve_secrets(mock_sm_client, secret_name) @@ -57,17 +65,20 @@ def test_retrieves_secrets_returns_correct_keys_and_values(mock_sm_client, mock_ assert result["database"] == "test_database" assert result["port"] == "test_port" -def test_retrieves_secrets_raises_error_if_secret_name_incorrect_data_type(mock_sm_client): - secret_name = [1, 2, 3] +def test_retrieves_secrets_raises_error_if_secret_name_incorrect_data_type( + mock_sm_client, +): + secret_name = [1, 2, 3] with pytest.raises(botocore.exceptions.ParamValidationError) as error: retrieve_secrets(mock_sm_client, secret_name) -def test_retrieves_secrets_raises_error_if_secret_name_does_not_exist(mock_sm_client, mock_store_secret): - secret_name = 'test_secret_2' - +def test_retrieves_secrets_raises_error_if_secret_name_does_not_exist( + mock_sm_client, mock_store_secret +): + secret_name = "test_secret_2" with pytest.raises(botocore.exceptions.ClientError) as error: - retrieve_secrets(mock_sm_client, secret_name) \ No newline at end of file + retrieve_secrets(mock_sm_client, secret_name) -- cgit v1.2.3 From e27c6b48897a48f8462b8a0f40deb0ddaf301b63 Mon Sep 17 00:00:00 2001 From: Ang Bel Date: Mon, 19 Aug 2024 11:21:58 +0100 Subject: layers block update, function resources to inlcude attributes: layers, correct handler and source_code_hash --- terraform/lambda.tf | 70 +++++++++++++++++++++++++++++------------------------ terraform/s3.tf | 5 ++++ 2 files changed, 44 insertions(+), 31 deletions(-) diff --git a/terraform/lambda.tf b/terraform/lambda.tf index 27e6266..e33bc79 100644 --- a/terraform/lambda.tf +++ b/terraform/lambda.tf @@ -12,12 +12,14 @@ resource "aws_s3_object" "extract_lambda_code" { } resource "aws_lambda_function" "extract_lambda" { - function_name = var.extract_lambda_name - s3_bucket = aws_s3_bucket.lambda_code_bucket.bucket - s3_key = aws_s3_object.extract_lambda_code.key - role = aws_iam_role.multi_service_role.arn - handler = "extract_lambda.extract" - runtime = "python3.11" + function_name = var.extract_lambda_name + s3_bucket = aws_s3_bucket.lambda_code_bucket.bucket + s3_key = aws_s3_object.extract_lambda_code.key + layers = [aws_lambda_layer_version.lambda_layer.arn] + role = aws_iam_role.multi_service_role.arn + handler = "extract_lambda.lambda_handler" + runtime = "python3.11" + source_code_hash = data.archive_file.extract_lambda_zip.output_base64sha256 lifecycle { create_before_destroy = true @@ -40,12 +42,14 @@ resource "aws_s3_object" "transform_lambda_code" { } resource "aws_lambda_function" "transform_lambda" { - function_name = var.transform_lambda_name - s3_bucket = aws_s3_bucket.lambda_code_bucket.bucket - s3_key = aws_s3_object.transform_lambda_code.key - role = aws_iam_role.multi_service_role.arn - handler = "transform_lambda.transform" - runtime = "python3.11" + function_name = var.transform_lambda_name + s3_bucket = aws_s3_bucket.lambda_code_bucket.bucket + s3_key = aws_s3_object.transform_lambda_code.key + layers = [aws_lambda_layer_version.lambda_layer.arn] + role = aws_iam_role.multi_service_role.arn + handler = "transform_lambda.lambda_handler" + runtime = "python3.11" + source_code_hash = data.archive_file.transform_lambda_zip.output_base64sha256 lifecycle { create_before_destroy = true @@ -68,12 +72,14 @@ resource "aws_s3_object" "load_lambda_code" { } resource "aws_lambda_function" "load_lambda" { - function_name = var.load_lambda_name - s3_bucket = aws_s3_bucket.lambda_code_bucket.bucket - s3_key = aws_s3_object.load_lambda_code.key - role = aws_iam_role.multi_service_role.arn - handler = "load_lambda.load" - runtime = "python3.11" + function_name = var.load_lambda_name + s3_bucket = aws_s3_bucket.lambda_code_bucket.bucket + s3_key = aws_s3_object.load_lambda_code.key + layers = [aws_lambda_layer_version.lambda_layer.arn] + role = aws_iam_role.multi_service_role.arn + handler = "load_lambda.lambda_handler" + runtime = "python3.11" + source_code_hash = data.archive_file.load_lambda_zip.output_base64sha256 lifecycle { create_before_destroy = true @@ -82,10 +88,12 @@ resource "aws_lambda_function" "load_lambda" { depends_on = [aws_s3_object.load_lambda_code] } +# Lambda Layer Specification locals { - layer_dir = "${path.module}/.." - requirements = "${path.module}/../requirements.txt" - layer_zip = "${path.module}/../layer.zip" + layer_dir = "lambda_layer" + requirements = "requirements.txt" + layer_zip = "layer.zip" + layer_name = "lambda_layer_dev" } resource "null_resource" "prepare_layer" { @@ -95,23 +103,23 @@ resource "null_resource" "prepare_layer" { rm -rf python mkdir python pip3 install -r ${local.requirements} -t python/ - zip -r ${local.layer_zip} python/ - EOT - } + zip -r ${local.layer_zip} python + EOT + } #removed / at the end of python in line 99 } -resource "aws_s3_object" "layer_zip" { - bucket = aws_s3_bucket.lambda_code_bucket.bucket - key = "layer.zip" +resource "aws_s3_object" "lambda_layer_zip" { + bucket = aws_s3_bucket.lambda_code_bucket.id #bucket instead of id + key = "lambda_layer/${local.layer_name}/${local.layer_zip}" source = "${local.layer_dir}/${local.layer_zip}" depends_on = [null_resource.prepare_layer] } resource "aws_lambda_layer_version" "lambda_layer" { - layer_name = "lambda_layer" + layer_name = local.layer_name compatible_runtimes = ["python3.11"] - s3_bucket = aws_s3_bucket.lambda_code_bucket.bucket - s3_key = aws_s3_object.layer_zip.key + s3_bucket = aws_s3_bucket.lambda_layer_bucket.id #bucket instead of id + s3_key = aws_s3_object.lambda_layer_zip.key skip_destroy = true - depends_on = [aws_s3_object.layer_zip] + depends_on = [aws_s3_object.lambda_layer_zip] } diff --git a/terraform/s3.tf b/terraform/s3.tf index d5cdee3..b3a863c 100644 --- a/terraform/s3.tf +++ b/terraform/s3.tf @@ -12,3 +12,8 @@ resource "aws_s3_bucket" "transform_bucket" { resource "aws_s3_bucket" "lambda_code_bucket" { bucket_prefix = "${var.s3_code_bucket_name}-" } + +### LAMBDA LAYER BUCKET +resource "aws_s3_bucket" "lambda_layer_bucket" { + bucket_prefix = "lambda-layer-dev-" +} \ No newline at end of file -- cgit v1.2.3 From 43df5dd9c6bd21f33a7fccbc9b81ad3677637da5 Mon Sep 17 00:00:00 2001 From: "deepsource-autofix[bot]" <62050782+deepsource-autofix[bot]@users.noreply.github.com> Date: Mon, 19 Aug 2024 10:23:19 +0000 Subject: style: format code with Autopep8, Black and Ruff Formatter This commit fixes the style issues introduced in e27c6b4 according to the output from Autopep8, Black and Ruff Formatter. Details: https://github.com/ajschofield/de-project-bentley/pull/55 --- src/load_lambda.py | 2 +- src/secrets_manager.py | 31 +++++++++---------- src/transform_lambda.py | 2 +- test/test_secrets_manager.py | 19 +++++++----- tests/test_extract_lambda.py | 69 ++++++++++++++++++++++++------------------- tests/test_secrets_manager.py | 37 +++++++++++++++-------- 6 files changed, 93 insertions(+), 67 deletions(-) diff --git a/src/load_lambda.py b/src/load_lambda.py index 6ee681f..c6a8e60 100644 --- a/src/load_lambda.py +++ b/src/load_lambda.py @@ -1,2 +1,2 @@ def lambda_handler(): - pass \ No newline at end of file + pass diff --git a/src/secrets_manager.py b/src/secrets_manager.py index c0fb61e..3484688 100644 --- a/src/secrets_manager.py +++ b/src/secrets_manager.py @@ -4,45 +4,46 @@ import json def sm_client(): - sm_client = boto3.client('secretsmanager') + sm_client = boto3.client("secretsmanager") yield sm_client -def create_secret(sm_client, secret_name, cohort_id, user, password, host, database, port): + +def create_secret( + sm_client, secret_name, cohort_id, user, password, host, database, port +): secret = { "cohort_id": cohort_id, "user": user, "password": password, "host": host, "database": database, - "port": port + "port": port, } response = sm_client.create_secret( - Name = secret_name, - SecretString = json.dumps(secret) + Name=secret_name, SecretString=json.dumps(secret) ) print(response) return response + def list_secret(sm_client): response = sm_client.list_secrets() - secret_dict = response['SecretList'] + secret_dict = response["SecretList"] secret_names = [] for items in secret_dict: - secret_names.append(items['Name']) - print(f'{len(secret_names)} secret(s) available') + secret_names.append(items["Name"]) + print(f"{len(secret_names)} secret(s) available") for name in secret_names: print(name) return secret_names -def retrieve_secrets(sm_client): - response = sm_client.get_secrets( - - ) +def retrieve_secrets(sm_client): + response = sm_client.get_secrets() -#retrieve secret -#so lambda can access totesy db -#so lambda connect to the db and then retrieve the data \ No newline at end of file +# retrieve secret +# so lambda can access totesy db +# so lambda connect to the db and then retrieve the data diff --git a/src/transform_lambda.py b/src/transform_lambda.py index 6ee681f..c6a8e60 100644 --- a/src/transform_lambda.py +++ b/src/transform_lambda.py @@ -1,2 +1,2 @@ def lambda_handler(): - pass \ No newline at end of file + pass diff --git a/test/test_secrets_manager.py b/test/test_secrets_manager.py index 86533bc..cb4ec15 100644 --- a/test/test_secrets_manager.py +++ b/test/test_secrets_manager.py @@ -2,10 +2,12 @@ from src.secrets_manager import sm_client, create_secret, list_secret import boto3 from moto import mock_aws import json -import pytest +import pytest import os -pytest.fixture(scope='class') +pytest.fixture(scope="class") + + def mock_aws_credentials(): """Mocked AWS Credentials for moto.""" os.environ["AWS_ACCESS_KEY_ID"] = "testing" @@ -14,10 +16,11 @@ def mock_aws_credentials(): os.environ["AWS_SESSION_TOKEN"] = "testing" os.environ["AWS_DEFAULT_REGION"] = "eu-west-2" -@pytest.fixture(scope='class') + +@pytest.fixture(scope="class") def mock_sm_client(mock_aws_credentials): with mock_aws(): - yield boto3.client('secretsmanager') + yield boto3.client("secretsmanager") def test_create_secret_stores_secrets(mock_sm_client): @@ -29,6 +32,8 @@ def test_create_secret_stores_secrets(mock_sm_client): port = "test_port" secret_name = "test_secret" - response = create_secret(mock_sm_client, secret_name, cohort_id, user, password, host, database, port) - - assert response['Name'] == secret_name \ No newline at end of file + response = create_secret( + mock_sm_client, secret_name, cohort_id, user, password, host, database, port + ) + + assert response["Name"] == secret_name diff --git a/tests/test_extract_lambda.py b/tests/test_extract_lambda.py index e94a8a4..877e36a 100644 --- a/tests/test_extract_lambda.py +++ b/tests/test_extract_lambda.py @@ -3,11 +3,17 @@ import boto3 from moto import mock_aws from unittest.mock import patch, MagicMock from unittest import TestCase -from src.extract_lambda import list_existing_s3_files, connect_to_database, DBConnectionException, process_and_upload_tables -import os +from src.extract_lambda import ( + list_existing_s3_files, + connect_to_database, + DBConnectionException, + process_and_upload_tables, +) +import os import logging -@pytest.fixture(scope='class') + +@pytest.fixture(scope="class") def mock_config(): env_vars = { "host": "abc", @@ -20,54 +26,55 @@ def mock_config(): yield mock_config -@pytest.fixture(scope='class') +@pytest.fixture(scope="class") def aws_credentials(): - os.environ["AWS_ACCESS_KEY_ID"] = 'testing' - os.environ["AWS_SECRET_ACCESS_KEY"] = 'testing' - os.environ["AWS_SECURIT_TOKEN"] = 'testing' - os.environ["AWS_SESSION_TOKEN"] = 'testing' - os.environ["AWS_DEFAULT_REGION"]= 'eu-west-2' + os.environ["AWS_ACCESS_KEY_ID"] = "testing" + os.environ["AWS_SECRET_ACCESS_KEY"] = "testing" + os.environ["AWS_SECURIT_TOKEN"] = "testing" + os.environ["AWS_SESSION_TOKEN"] = "testing" + os.environ["AWS_DEFAULT_REGION"] = "eu-west-2" + -@pytest.fixture(scope='class') +@pytest.fixture(scope="class") def s3_client(aws_credentials): with mock_aws(): - yield boto3.client('s3') + yield boto3.client("s3") + class TestListExistingS3Files: def test_error_if_no_bucket(self, s3_client, caplog): - logger = logging.getLogger() - logger.info('Testing now.') + logger.info("Testing now.") caplog.set_level(logging.ERROR) list_existing_s3_files(client=s3_client) - assert 'Error listing S3 objects' in caplog.text + assert "Error listing S3 objects" in caplog.text def test_error_if_bucket_is_empty(self, s3_client, caplog): - - s3_client.create_bucket(Bucket='extract_bucket', - CreateBucketConfiguration={ - 'LocationConstraint': 'eu-west-2' - }) + s3_client.create_bucket( + Bucket="extract_bucket", + CreateBucketConfiguration={"LocationConstraint": "eu-west-2"}, + ) list_existing_s3_files(client=s3_client) - assert 'The bucket is empty' in caplog.text + assert "The bucket is empty" in caplog.text def test_error_retrieving_object(self, s3_client, caplog): - s3_client.upload_file('tests/dummy.txt', 'extract_bucket', 'dummy.txt') - list_existing_s3_files(bucket_name='test_bucket', client=s3_client) + s3_client.upload_file("tests/dummy.txt", "extract_bucket", "dummy.txt") + list_existing_s3_files(bucket_name="test_bucket", client=s3_client) - assert 'Error retrieving S3 object ' in caplog.text + assert "Error retrieving S3 object " in caplog.text def test_retrieves_file_content(self, s3_client, caplog): result = list_existing_s3_files(client=s3_client) - assert list(result.values()) == ['This is a test file.'] + assert list(result.values()) == ["This is a test file."] + class TestConnectToDatabase: def test_connect_to_database(mock_conn, mock_config): - with patch("src.extract_lambda.Connection", autospec=True) as mock_conn: + with patch("src.extract_lambda.Connection", autospec=True) as mock_conn: connect_to_database() mock_conn.assert_called_with( - host="abc", user="def", port="5432", password="password", database="db" + host="abc", user="def", port="5432", password="password", database="db" ) def test_database_error(self, mock_config): @@ -76,12 +83,14 @@ class TestConnectToDatabase: def test_logs_interface_error(self, caplog): logger = logging.getLogger() - logger.info('Testing now.') + logger.info("Testing now.") caplog.set_level(logging.ERROR) with pytest.raises(DBConnectionException): connect_to_database() - assert 'Interface error' in caplog.text -''' + assert "Interface error" in caplog.text + + +""" class TestProcessAndUploadTables: def test_error_process_and_upload_tables(mock_conn, mock_config, s3_client, caplog): logger = logging.getLogger() @@ -106,4 +115,4 @@ class TestProcessAndUploadTables: s3_client.upload_file('tests/dummy_identical.csv', 'extract_bucket', s3_key) process_and_upload_tables(mock_db(), existing_files, client=s3_client) assert 'No new data.' in caplog.text -''' \ No newline at end of file +""" diff --git a/tests/test_secrets_manager.py b/tests/test_secrets_manager.py index a30be86..609c572 100644 --- a/tests/test_secrets_manager.py +++ b/tests/test_secrets_manager.py @@ -3,10 +3,11 @@ import boto3 import botocore.exceptions from moto import mock_aws import json -import pytest +import pytest import os -@pytest.fixture(scope='function') + +@pytest.fixture(scope="function") def aws_credentials(): """Mocked AWS Credentials for moto.""" os.environ["AWS_ACCESS_KEY_ID"] = "testing" @@ -15,12 +16,14 @@ def aws_credentials(): os.environ["AWS_SESSION_TOKEN"] = "testing" os.environ["AWS_DEFAULT_REGION"] = "eu-west-2" -@pytest.fixture(scope='function') + +@pytest.fixture(scope="function") def mock_sm_client(aws_credentials): with mock_aws(): yield boto3.client("secretsmanager") -@pytest.fixture(scope='function') + +@pytest.fixture(scope="function") def mock_store_secret(mock_sm_client): secret = { "cohort_id": "test_cohort_id", @@ -28,15 +31,18 @@ def mock_store_secret(mock_sm_client): "password": "test_password", "host": "test_host", "database": "test_database", - "port": "test_port" + "port": "test_port", } secret_name = "test_secret" - response = mock_sm_client.create_secret(Name=secret_name, SecretString=json.dumps(secret)) + response = mock_sm_client.create_secret( + Name=secret_name, SecretString=json.dumps(secret) + ) return response + def test_retrieves_secrets_returns_dictionary(mock_sm_client, mock_store_secret): secret_name = "test_secret" @@ -44,8 +50,10 @@ def test_retrieves_secrets_returns_dictionary(mock_sm_client, mock_store_secret) assert isinstance(result, dict) -def test_retrieves_secrets_returns_correct_keys_and_values(mock_sm_client, mock_store_secret): +def test_retrieves_secrets_returns_correct_keys_and_values( + mock_sm_client, mock_store_secret +): secret_name = "test_secret" result = retrieve_secrets(mock_sm_client, secret_name) @@ -57,17 +65,20 @@ def test_retrieves_secrets_returns_correct_keys_and_values(mock_sm_client, mock_ assert result["database"] == "test_database" assert result["port"] == "test_port" -def test_retrieves_secrets_raises_error_if_secret_name_incorrect_data_type(mock_sm_client): - secret_name = [1, 2, 3] +def test_retrieves_secrets_raises_error_if_secret_name_incorrect_data_type( + mock_sm_client, +): + secret_name = [1, 2, 3] with pytest.raises(botocore.exceptions.ParamValidationError) as error: retrieve_secrets(mock_sm_client, secret_name) -def test_retrieves_secrets_raises_error_if_secret_name_does_not_exist(mock_sm_client, mock_store_secret): - secret_name = 'test_secret_2' - +def test_retrieves_secrets_raises_error_if_secret_name_does_not_exist( + mock_sm_client, mock_store_secret +): + secret_name = "test_secret_2" with pytest.raises(botocore.exceptions.ClientError) as error: - retrieve_secrets(mock_sm_client, secret_name) \ No newline at end of file + retrieve_secrets(mock_sm_client, secret_name) -- cgit v1.2.3 From 1ea59ed0d92d5bbbd1ffe46ca7a1e296aa55fb1f Mon Sep 17 00:00:00 2001 From: T-Aji Date: Mon, 19 Aug 2024 11:29:45 +0100 Subject: all tests added --- tests/test_extract_lambda.py | 155 ++++++++++++++++++++++++++++++++++--------- 1 file changed, 125 insertions(+), 30 deletions(-) diff --git a/tests/test_extract_lambda.py b/tests/test_extract_lambda.py index e94a8a4..67cb6d3 100644 --- a/tests/test_extract_lambda.py +++ b/tests/test_extract_lambda.py @@ -3,11 +3,19 @@ import boto3 from moto import mock_aws from unittest.mock import patch, MagicMock from unittest import TestCase -from src.extract_lambda import list_existing_s3_files, connect_to_database, DBConnectionException, process_and_upload_tables -import os +from src.extract_lambda import ( + list_existing_s3_files, + connect_to_database, + DBConnectionException, + lambda_handler, + process_and_upload_tables, +) +import os import logging +import json -@pytest.fixture(scope='class') + +@pytest.fixture(scope="class") def mock_config(): env_vars = { "host": "abc", @@ -20,54 +28,139 @@ def mock_config(): yield mock_config -@pytest.fixture(scope='class') +@pytest.fixture(scope="class") def aws_credentials(): - os.environ["AWS_ACCESS_KEY_ID"] = 'testing' - os.environ["AWS_SECRET_ACCESS_KEY"] = 'testing' - os.environ["AWS_SECURIT_TOKEN"] = 'testing' - os.environ["AWS_SESSION_TOKEN"] = 'testing' - os.environ["AWS_DEFAULT_REGION"]= 'eu-west-2' + os.environ["AWS_ACCESS_KEY_ID"] = "testing" + os.environ["AWS_SECRET_ACCESS_KEY"] = "testing" + os.environ["AWS_SECURIT_TOKEN"] = "testing" + os.environ["AWS_SESSION_TOKEN"] = "testing" + os.environ["AWS_DEFAULT_REGION"] = "eu-west-2" + -@pytest.fixture(scope='class') +@pytest.fixture(scope="class") def s3_client(aws_credentials): with mock_aws(): - yield boto3.client('s3') + yield boto3.client("s3") + + +class TestLambdaHandler: + def test_lambda_handler_files_processed_and_uploaded_successfully(self, mocker): + mock_db = MagicMock() + mock_db.run.side_effect = [ + [["Fruits"]], + [["Vegetable", "Sour", "Green"], ["Berry", "Sweet", "Red"]], + [["Food_type"], ["Flavour"], ["Colour"]], + ] + mock_db.columns.return_value = [ + {"name": "Food_type"}, + {"name": "Flavour"}, + {"name": "Colour"}, + ] + with patch("src.extract_lambda.connect_to_database", return_value=mock_db): + mock_process_and_upload_tables = mocker.patch( + "src.extract_lambda.process_and_upload_tables", return_value=mock_db + ) + mock_list_existing_s3_files = mocker.patch( + "src.extract_lambda.list_existing_s3_files", return_value={} + ) + event = {} + context = {} + response = lambda_handler(event, context) + assert response["statusCode"] == 200 + assert ( + json.loads(response["body"]) + == "CSV files processed and uploaded successfully." + ) + mock_list_existing_s3_files.assert_called_once() + mock_process_and_upload_tables.assert_called_once_with(mock_db, {}) + mock_db.close.assert_called_once() + + def test_lambda_handler_no_changes_detected_no_files_uploaded(self, mocker): + mock_db = MagicMock() + mock_db.run.side_effect = [ + [["Fruits"]], + [["Vegetable", "Sour", "Green"], ["Berry", "Sweet", "Red"]], + [["Food_type"], ["Flavour"], ["Colour"]], + ] + mock_db.columns.return_value = [ + {"name": "Food_type"}, + {"name": "Flavour"}, + {"name": "Colour"}, + ] + + with patch("src.extract_lambda.connect_to_database", return_value=mock_db): + mock_process_and_upload_tables = mocker.patch( + "src.extract_lambda.process_and_upload_tables", return_value=False + ) + mock_list_existing_s3_files = mocker.patch( + "src.extract_lambda.list_existing_s3_files", return_value={} + ) + event = {} + context = {} + response = lambda_handler(event, context) + assert response["statusCode"] == 200 + assert ( + json.loads(response["body"]) + == "No changes detected, no CSV files were uploaded." + ) + mock_list_existing_s3_files.assert_called_once() + mock_process_and_upload_tables.assert_called_once_with(mock_db, {}) + mock_db.close.assert_called_once() + + def test_lambda_handler_exception_error(self, mocker): + with patch( + "src.extract_lambda.connect_to_database", + side_effect=Exception("Database connection error"), + ): + mock_process_and_upload_tables = mocker.patch( + "src.extract_lambda.process_and_upload_tables" + ) + mock_list_existing_s3_files = mocker.patch( + "src.extract_lambda.list_existing_s3_files" + ) + event = {} + context = {} + response = lambda_handler(event, context) + assert response["statusCode"] == 500 + assert json.loads(response["body"]) == "Internal server error." + mock_list_existing_s3_files.assert_not_called() + mock_process_and_upload_tables.assert_not_called() + class TestListExistingS3Files: def test_error_if_no_bucket(self, s3_client, caplog): - logger = logging.getLogger() - logger.info('Testing now.') + logger.info("Testing now.") caplog.set_level(logging.ERROR) list_existing_s3_files(client=s3_client) - assert 'Error listing S3 objects' in caplog.text + assert "Error listing S3 objects" in caplog.text def test_error_if_bucket_is_empty(self, s3_client, caplog): - - s3_client.create_bucket(Bucket='extract_bucket', - CreateBucketConfiguration={ - 'LocationConstraint': 'eu-west-2' - }) + s3_client.create_bucket( + Bucket="extract_bucket", + CreateBucketConfiguration={"LocationConstraint": "eu-west-2"}, + ) list_existing_s3_files(client=s3_client) - assert 'The bucket is empty' in caplog.text + assert "The bucket is empty" in caplog.text def test_error_retrieving_object(self, s3_client, caplog): - s3_client.upload_file('tests/dummy.txt', 'extract_bucket', 'dummy.txt') - list_existing_s3_files(bucket_name='test_bucket', client=s3_client) + s3_client.upload_file("tests/dummy.txt", "extract_bucket", "dummy.txt") + list_existing_s3_files(bucket_name="test_bucket", client=s3_client) - assert 'Error retrieving S3 object ' in caplog.text + assert "Error retrieving S3 object " in caplog.text def test_retrieves_file_content(self, s3_client, caplog): result = list_existing_s3_files(client=s3_client) - assert list(result.values()) == ['This is a test file.'] + assert list(result.values()) == ["This is a test file."] + class TestConnectToDatabase: def test_connect_to_database(mock_conn, mock_config): - with patch("src.extract_lambda.Connection", autospec=True) as mock_conn: + with patch("src.extract_lambda.Connection", autospec=True) as mock_conn: connect_to_database() mock_conn.assert_called_with( - host="abc", user="def", port="5432", password="password", database="db" + host="abc", user="def", port="5432", password="password", database="db" ) def test_database_error(self, mock_config): @@ -76,12 +169,14 @@ class TestConnectToDatabase: def test_logs_interface_error(self, caplog): logger = logging.getLogger() - logger.info('Testing now.') + logger.info("Testing now.") caplog.set_level(logging.ERROR) with pytest.raises(DBConnectionException): connect_to_database() - assert 'Interface error' in caplog.text -''' + assert "Interface error" in caplog.text + + +""" class TestProcessAndUploadTables: def test_error_process_and_upload_tables(mock_conn, mock_config, s3_client, caplog): logger = logging.getLogger() @@ -106,4 +201,4 @@ class TestProcessAndUploadTables: s3_client.upload_file('tests/dummy_identical.csv', 'extract_bucket', s3_key) process_and_upload_tables(mock_db(), existing_files, client=s3_client) assert 'No new data.' in caplog.text -''' \ No newline at end of file +""" -- cgit v1.2.3 From 24a4573d6cf64ec0383ae16bfba09a0ffdb8c129 Mon Sep 17 00:00:00 2001 From: T-Aji Date: Mon, 19 Aug 2024 11:49:08 +0100 Subject: update .gitignore --- .gitignore | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/.gitignore b/.gitignore index bceab93..6aa03fc 100644 --- a/.gitignore +++ b/.gitignore @@ -14,5 +14,4 @@ __pycache__/ # OS-Related Files .DS_Store - -*venv* +venv \ No newline at end of file -- cgit v1.2.3 From 444bb270fc8f758f33b0477c992b6a8e873bcd89 Mon Sep 17 00:00:00 2001 From: "deepsource-autofix[bot]" <62050782+deepsource-autofix[bot]@users.noreply.github.com> Date: Mon, 19 Aug 2024 11:06:02 +0000 Subject: style: format code with Autopep8, Black and Ruff Formatter This commit fixes the style issues introduced in 0eff70f according to the output from Autopep8, Black and Ruff Formatter. Details: https://github.com/ajschofield/de-project-bentley/pull/59 --- tests/test_extract_lambda.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/test_extract_lambda.py b/tests/test_extract_lambda.py index fc68a4a..7707cbf 100644 --- a/tests/test_extract_lambda.py +++ b/tests/test_extract_lambda.py @@ -42,6 +42,7 @@ def s3_client(aws_credentials): with mock_aws(): yield boto3.client("s3") + class TestLambdaHandler: def test_lambda_handler_files_processed_and_uploaded_successfully(self, mocker): mock_db = MagicMock() @@ -125,6 +126,7 @@ class TestLambdaHandler: mock_list_existing_s3_files.assert_not_called() mock_process_and_upload_tables.assert_not_called() + class TestListExistingS3Files: def test_error_if_no_bucket(self, s3_client, caplog): logger = logging.getLogger() -- cgit v1.2.3