From 610261fec06ab3b6106465960d6935dd9df85df0 Mon Sep 17 00:00:00 2001
From: Ang Bel <anzelikabelotelova@Anzelikas-MacBook-Air.local>
Date: Fri, 16 Aug 2024 09:46:53 +0100
Subject: Secrets manager integration into the extract lambda reviewed.

---
 src/extract_lambda.py         | 29 +++++++++--------
 tests/test_secrets_manager.py | 73 +++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 89 insertions(+), 13 deletions(-)
 create mode 100644 tests/test_secrets_manager.py

diff --git a/src/extract_lambda.py b/src/extract_lambda.py
index fb2d7e8..3055f63 100644
--- a/src/extract_lambda.py
+++ b/src/extract_lambda.py
@@ -1,5 +1,4 @@
-from pg8000.native import Connection, DatabaseError, InterfaceError
-from dotenv import dotenv_values
+from pg8000.native import Connection, InterfaceError
 import boto3
 import csv
 from botocore.exceptions import ClientError
@@ -42,31 +41,35 @@ def lambda_handler(event, context):
                 'statusCode': 200,
                 'body': json.dumps('CSV files processed and uploaded successfully.')
             }
-        
     except Exception as e:
         logger.error(f'Error: {e}')
         return {
             'statusCode': 500,
             'body': json.dumps('Internal server error.')
         }
-    
     finally:
- 
         if db:
             db.close()
 
-def get_config(path: str = ".env") -> dict:
-    return dotenv_values(path)
 
+def retrieve_secrets(sm_client=boto3.client('secretsmanager'), secret_name='bentley-secrets'):
+    try:
+        response = sm_client.get_secret_value(SecretId=secret_name)
+        if 'SecretString' in response:
+            secret = json.loads(response['SecretString'])
+            return secret
+    except ClientError as e:
+        logger.error(f'Could not retrieve secrets: {e}')
+        raise e
 
 def connect_to_database() -> Connection:
     try:
-        config = get_config()
-        host = config["host"]
-        port = config["port"]
-        user = config["user"]
-        password = config["password"]
-        database = config["database"]
+        secrets = retrieve_secrets()
+        host = secrets["host"]
+        port = secrets["port"]
+        user = secrets["user"]
+        password = secrets["password"]
+        database = secrets["database"]
 
         return Connection(
             database=database,
diff --git a/tests/test_secrets_manager.py b/tests/test_secrets_manager.py
new file mode 100644
index 0000000..a30be86
--- /dev/null
+++ b/tests/test_secrets_manager.py
@@ -0,0 +1,73 @@
+from src.secrets_manager import sm_client, retrieve_secrets
+import boto3
+import botocore.exceptions
+from moto import mock_aws
+import json
+import pytest 
+import os
+
+@pytest.fixture(scope='function')
+def aws_credentials():
+    """Mocked AWS Credentials for moto."""
+    os.environ["AWS_ACCESS_KEY_ID"] = "testing"
+    os.environ["AWS_SECRET_ACCESS_KEY"] = "testing"
+    os.environ["AWS_SECURITY_TOKEN"] = "testing"
+    os.environ["AWS_SESSION_TOKEN"] = "testing"
+    os.environ["AWS_DEFAULT_REGION"] = "eu-west-2"
+
+@pytest.fixture(scope='function')
+def mock_sm_client(aws_credentials):
+    with mock_aws():
+        yield boto3.client("secretsmanager")
+
+@pytest.fixture(scope='function')
+def mock_store_secret(mock_sm_client):
+    secret = {
+        "cohort_id": "test_cohort_id",
+        "user": "test_user_id",
+        "password": "test_password",
+        "host": "test_host",
+        "database": "test_database",
+        "port": "test_port"
+    }
+
+    secret_name = "test_secret"
+
+    response = mock_sm_client.create_secret(Name=secret_name, SecretString=json.dumps(secret))
+
+    return response
+
+def test_retrieves_secrets_returns_dictionary(mock_sm_client, mock_store_secret):
+    secret_name = "test_secret"
+
+    result = retrieve_secrets(mock_sm_client, secret_name)
+
+    assert isinstance(result, dict)
+
+def test_retrieves_secrets_returns_correct_keys_and_values(mock_sm_client, mock_store_secret):
+
+    secret_name = "test_secret"
+
+    result = retrieve_secrets(mock_sm_client, secret_name)
+
+    assert result["cohort_id"] == "test_cohort_id"
+    assert result["user"] == "test_user_id"
+    assert result["password"] == "test_password"
+    assert result["host"] == "test_host"
+    assert result["database"] == "test_database"
+    assert result["port"] == "test_port"
+
+def test_retrieves_secrets_raises_error_if_secret_name_incorrect_data_type(mock_sm_client):
+    secret_name = [1, 2, 3]
+
+
+    with pytest.raises(botocore.exceptions.ParamValidationError) as error:
+        retrieve_secrets(mock_sm_client, secret_name)
+
+
+def test_retrieves_secrets_raises_error_if_secret_name_does_not_exist(mock_sm_client, mock_store_secret):
+    secret_name = 'test_secret_2'
+
+
+    with pytest.raises(botocore.exceptions.ClientError) as error:
+        retrieve_secrets(mock_sm_client, secret_name)
\ No newline at end of file
-- 
cgit v1.2.3


From 938ddda10ff2f7d5360ca0a939fa2f16d6beb09d Mon Sep 17 00:00:00 2001
From: Ang Bel <anzelikabelotelova@Anzelikas-MacBook-Air.local>
Date: Fri, 16 Aug 2024 10:01:06 +0100
Subject: extract bucket name retrieval helper function and replace the bucket
 name placeholders

---
 src/extract_lambda.py | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/src/extract_lambda.py b/src/extract_lambda.py
index 3055f63..f4c0c1d 100644
--- a/src/extract_lambda.py
+++ b/src/extract_lambda.py
@@ -82,9 +82,12 @@ def connect_to_database() -> Connection:
         logger.error(f'Interface error: {i}')
         raise DBConnectionException("Failed to connect to database")
 
+def extract_bucket(client=boto3.client('s3')):
+    response = client.list_buckets()
+    extract_bucket_filter = [bucket['Name'] for bucket in response['Buckets'] if 'extract' in bucket['Name']]
+    return extract_bucket_filter[0]
 
-
-def list_existing_s3_files(bucket_name='extract_bucket', client=boto3.client('s3')):
+def list_existing_s3_files(bucket_name=extract_bucket(), client=boto3.client('s3')):
     """Creates a dictionary and populates it with the 
        results of listing the contents of the s3 bucket, then
        returns the populated dictionary
@@ -93,7 +96,7 @@ def list_existing_s3_files(bucket_name='extract_bucket', client=boto3.client('s3
     existing_files = {}
     
     try:
-        response = client.list_objects_v2(Bucket='extract_bucket')
+        response = client.list_objects_v2(Bucket=bucket_name)
         
         if 'Contents' in response:
             for obj in response['Contents']:
@@ -150,7 +153,7 @@ def process_and_upload_tables(db, existing_files, client=boto3.client('s3')):
         ## END OF NEW CODE
         if existing_files[latest_s3_object_key] != new_csv_content: 
             try:
-                client.upload_file(csv_file_path, 'extract_bucket', s3_key)
+                client.upload_file(csv_file_path, extract_bucket(), s3_key)
                 logger.info(f"Uploaded {s3_key} to S3.")
             except ClientError as e:
                 logger.error(f'Error uploading to S3: {e}')
-- 
cgit v1.2.3


From c937a7e098d818dadbc769b3c9eb9fd93cc05af2 Mon Sep 17 00:00:00 2001
From: Alex Schofield <git@ajschof.me>
Date: Fri, 16 Aug 2024 10:01:28 +0100
Subject: docs: rm DEVNOTES.md

basically redundant now
---
 DEVNOTES.md | 100 ------------------------------------------------------------
 1 file changed, 100 deletions(-)
 delete mode 100644 DEVNOTES.md

diff --git a/DEVNOTES.md b/DEVNOTES.md
deleted file mode 100644
index 00b4ddd..0000000
--- a/DEVNOTES.md
+++ /dev/null
@@ -1,100 +0,0 @@
-# Workflow
-
-## References
-
-https://nvie.com/posts/a-successful-git-branching-model/ \
-https://learn.microsoft.com/en-us/azure/devops/repos/git/merging-with-squash?view=azure-devops
-
-
-## Branching
-
-*Based off GitFlow but slightly modified*
-
-- There are two main branches
-  - `main` - production-ready code
-  - `development` - integration branch for features
-  - `staging` - represents the current staging state
-- In addition, there are additional branches 
-  - Feature branches - for new features and non-urgent bugfixes
-  - Hotfix branches - probably won't be used but for critical bugs in production (this is what testing should prevent)
-  - Release branches - for preparation of production releases
-
-- Feature branches - e.g. `feature/short-description`
-- Bugfix branches - e.g. `bugfix/short-description`
-- Hotfix branches - e.g. `hotfix/short-description`
-- Release branches - e.g. `release/vX.Y.Z`
-
-### Examples
-```
-feature/add-data-extractor
-bugfix/fix-s3-upload-error
-hotfix/security-patch
-release/v1.0.0
-```
-
-## Environments
-
-1. Development - where active development and initial testing occur
-2. Staging - for integration testing and final checks before production
-3. Production - live and stable environment
-
-## Deployment
-
-1. `main` - represents the current production state
-2. `develop` - represents the integration branch for features and non-urgent fixes
-3. `staging` - represents the current staging state
-
-## Staging Flow
-
-1. Create feature branches from `develop` & merge completed features back into `develop`
-2. When the `develop` branch is ready for testing, create a `staging` branch from `develop`
-3. Deploy the `staging` branch to the staging environment and perform our unit-tests
-4. If staging tests pass, create a `release/vX.Y.Z` branch from `staging`
-5. Make any final adjustments in the `release/vX.Y.Z` branch
-6. Once we have approved the changes in the `release/vX.Y.Z` branch, merge into `main`
-7. Tag the release in `main`
-
-### Notes
-
-- No new features should be included in the release branches and any new features should be merged into `develop` for the next release cycle
-
-## Commit Messages
-
-Please follow the conventional commits specification:
-  
-```
-<type>[optional scope]: <description>
-
-<optional body>
-
-[optional footer(s)]
-```
-
-### Types
-- feat: new features
-- fix: bugfixes
-- docs: documentation-only changes
-- style: changes that do not affect the meaning of the code
-- refactor: code changes that neither fix bugs nor adds features
-- perf: code changes that improve performance
-- test: adding tests or correcting existing tests
-- chore: changes to build process or tools/libraries (probably not needed)
-- infra: changes to infrastructure configuration (e.g. Terraform)
-
-### Examples
-```
-feat(extract): add automatic scheduling for data ingestion
-docs: update README with project setup instructions
-```
-
-Configuration files for things such as Terraform isn't native to Conventional Commits, but we can add our own:
-
-```
-infra(tf): update S3 bucket policy
-```
-
-If the Terraform change involves a fix, you may combine `fix` and `infra`:
-
-```
-fix(infra): ...
-```
-- 
cgit v1.2.3


From 861fd5fe8303c6558c7763477c89dc98fff23c57 Mon Sep 17 00:00:00 2001
From: lian-manonog <lian.manonog@gmail.com>
Date: Fri, 16 Aug 2024 10:20:14 +0100
Subject: wip: pushing the ci-cd-branch to test terraform infra

---
 .github/workflows/deploy.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml
index 372d0b3..922daee 100644
--- a/.github/workflows/deploy.yml
+++ b/.github/workflows/deploy.yml
@@ -3,7 +3,7 @@ name: deploy-terraform
 on:
   push:
     branches:
-      - test-ci/**  # Adjust the branch based on our deployment strategy
+      - ci-cd-branch # Adjust the branch based on our deployment strategy
 
 jobs:
   deploy-terraform:
-- 
cgit v1.2.3


From d25f05ba140cb85847ca604bef0e68b76a17ba62 Mon Sep 17 00:00:00 2001
From: Alex Schofield <git@ajschof.me>
Date: Fri, 16 Aug 2024 10:34:50 +0100
Subject: docs: add draft summary section

---
 README.md | 15 ++++++++++++++-
 1 file changed, 14 insertions(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 8ae0cb3..203482e 100644
--- a/README.md
+++ b/README.md
@@ -1 +1,14 @@
-# de-project-bentley
\ No newline at end of file
+# ToteSys - Data Engineering Project
+
+# Summary
+The project aims to implement a data platform that can extract data from an
+operational database, archive it in a data lake, and make it easily accessible
+within a remodelled OLAP data warehouse.
+
+The solution showcases our skills in:
+
+- Python
+- PostgreSQL
+- Database modelling
+- Amazon Web Services (AWS)
+- Agile methodologies
\ No newline at end of file
-- 
cgit v1.2.3


From 9809e7ca1351d7b27f62b3c7c74db7124cab5dc9 Mon Sep 17 00:00:00 2001
From: Alex Schofield <git@ajschof.me>
Date: Fri, 16 Aug 2024 10:40:00 +0100
Subject: docs: add draft main objective section

---
 README.md | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 203482e..e55cb16 100644
--- a/README.md
+++ b/README.md
@@ -11,4 +11,14 @@ The solution showcases our skills in:
 - PostgreSQL
 - Database modelling
 - Amazon Web Services (AWS)
-- Agile methodologies
\ No newline at end of file
+- Agile methodologies
+
+# Main Objective
+
+Our goal is to create a reliable ETL (Extract, Transform, Load) pipeline that
+can:
+
+1. Extract the data from the `totesys` operational database
+2. Store the data in AWS S3 buckets, that will form our data lake
+3. Transform the data into a suitable schema for the data warehouse
+4. Load the data into the data warehouse hosted on AWS
\ No newline at end of file
-- 
cgit v1.2.3


From 37eb3bb7974904614867c7d0c2d4f6eccb39f22e Mon Sep 17 00:00:00 2001
From: Alex Schofield <git@ajschof.me>
Date: Fri, 16 Aug 2024 10:41:01 +0100
Subject: docs(main_obj): clarify data being loaded into data warehouse

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index e55cb16..9c7baee 100644
--- a/README.md
+++ b/README.md
@@ -21,4 +21,4 @@ can:
 1. Extract the data from the `totesys` operational database
 2. Store the data in AWS S3 buckets, that will form our data lake
 3. Transform the data into a suitable schema for the data warehouse
-4. Load the data into the data warehouse hosted on AWS
\ No newline at end of file
+4. Load the transformed data into the data warehouse hosted on AWS
\ No newline at end of file
-- 
cgit v1.2.3


From 0c42e8f165e0f98a6c16252e841432922467ef94 Mon Sep 17 00:00:00 2001
From: Ellie <ecsymonds@gmail.com>
Date: Fri, 16 Aug 2024 10:51:16 +0100
Subject: add lambda layer

---
 terraform/lambda.tf | 35 +++++++++++++++++++++++++++++++++++
 1 file changed, 35 insertions(+)

diff --git a/terraform/lambda.tf b/terraform/lambda.tf
index 72d1306..658b8c8 100644
--- a/terraform/lambda.tf
+++ b/terraform/lambda.tf
@@ -81,3 +81,38 @@ resource "aws_lambda_function" "load_lambda" {
 
   depends_on = [aws_s3_object.load_lambda_code]
 }
+
+locals {
+  layer_dir       = "${path.module}/../python"
+  requirements    = "${path.module}/../requirements.txt"
+  layer_zip       = "${path.module}/../layer.zip"
+}
+
+resource "null_resource" "prepare_layer" {
+  triggers = {
+    requirements_hash = filesha1(local.requirements)
+  }
+  provisioner "local-exec" {
+    command = <<EOT
+      mkdir -p ${local.layer_dir}/lib/python3.8/site-packages/
+      pip install -r ${local.requirements} -t ${local.layer_dir}/lib/python3.11/site-packages/
+      cd ${local.layer_dir} && zip -r ${local.layer_zip} .
+    EOT
+}
+  }
+
+resource "aws_s3_object" "layer_zip" {
+  bucket = aws_s3_bucket.lambda_code_bucket.bucket
+  key    = "layer.zip"
+  source = local.layer_zip
+  depends_on = [null_resource.prepare_layer]
+}
+
+resource "aws_lambda_layer_version" "lambda_layer" {
+  layer_name          = "lambda_layer"
+  compatible_runtimes = ["python3.11"]
+  s3_bucket           = aws_s3_bucket.lambda_code_bucket.bucket 
+  s3_key              = aws_s3_object.layer_zip.key
+  skip_destroy = true
+  depends_on = [aws_s3_object.layer_zip]
+}
\ No newline at end of file
-- 
cgit v1.2.3


From 67a3caf058416718e9413520cb74be049af1e93e Mon Sep 17 00:00:00 2001
From: Alex Schofield <git@ajschof.me>
Date: Fri, 16 Aug 2024 11:09:59 +0100
Subject: docs: add draft key features section

---
 README.md | 15 ++++++++++++++-
 1 file changed, 14 insertions(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 9c7baee..0bf6b9d 100644
--- a/README.md
+++ b/README.md
@@ -21,4 +21,17 @@ can:
 1. Extract the data from the `totesys` operational database
 2. Store the data in AWS S3 buckets, that will form our data lake
 3. Transform the data into a suitable schema for the data warehouse
-4. Load the transformed data into the data warehouse hosted on AWS
\ No newline at end of file
+4. Load the transformed data into the data warehouse hosted on AWS
+
+# Key Features
+
+We aim for the project to have certain features. Some are more prioritised than
+others.
+
+- [ ] Automated data ingestion from `totesys` db
+- [ ] Data storage for ingested and processed data in S3 buckets
+- [ ] Data transformation for data warehouse schema
+- [ ] Automated data loading into the data warehouse schema
+- [ ] Logging and monitoring with CloudWatch
+- [ ] Notifications for errors and successful runs (e.g. successful ingestion)
+- [ ] Visualisation of warehouse data
\ No newline at end of file
-- 
cgit v1.2.3


From 24dd35f4bc6a0b8934f09b320f73bc88c6f68f1f Mon Sep 17 00:00:00 2001
From: Ellie <ecsymonds@gmail.com>
Date: Fri, 16 Aug 2024 12:19:54 +0100
Subject: comment out rds.tf to increases tf speed

---
 terraform/rds.tf | 138 +++++++++++++++++++++++++++----------------------------
 1 file changed, 69 insertions(+), 69 deletions(-)

diff --git a/terraform/rds.tf b/terraform/rds.tf
index 88783b7..d1b4959 100644
--- a/terraform/rds.tf
+++ b/terraform/rds.tf
@@ -1,80 +1,80 @@
-data "aws_availability_zones" "available" {}
+# data "aws_availability_zones" "available" {}
 
-module "vpc" {
-  source  = "terraform-aws-modules/vpc/aws"
-  version = "5.12.1"
+# module "vpc" {
+#   source  = "terraform-aws-modules/vpc/aws"
+#   version = "5.12.1"
 
-  name                 = var.project_name
-  cidr                 = "10.0.0.0/16"
-  azs                  = data.aws_availability_zones.available.names
-  public_subnets       = ["10.0.4.0/24", "10.0.5.0/24", "10.0.6.0/24"]
-  enable_dns_hostnames = true
-  enable_dns_support   = true
-}
+#   name                 = var.project_name
+#   cidr                 = "10.0.0.0/16"
+#   azs                  = data.aws_availability_zones.available.names
+#   public_subnets       = ["10.0.4.0/24", "10.0.5.0/24", "10.0.6.0/24"]
+#   enable_dns_hostnames = true
+#   enable_dns_support   = true
+# }
 
-resource "aws_db_subnet_group" "Terrific-Totes-sub-gr" {
-  name       = "tt-db-subnet"
-  subnet_ids = module.vpc.public_subnets
+# resource "aws_db_subnet_group" "Terrific-Totes-sub-gr" {
+#   name       = "tt-db-subnet"
+#   subnet_ids = module.vpc.public_subnets
 
-  tags = {
-    Name = "${var.project_name}"
-  }
-}
+#   tags = {
+#     Name = "${var.project_name}"
+#   }
+# }
 
-resource "aws_security_group" "rds" {
-  name   = "${var.project_name}-rds"
-  vpc_id = module.vpc.vpc_id
+# resource "aws_security_group" "rds" {
+#   name   = "${var.project_name}-rds"
+#   vpc_id = module.vpc.vpc_id
 
-  ingress {
-    from_port   = 5432
-    to_port     = 5432
-    protocol    = "tcp"
-    cidr_blocks = ["0.0.0.0/0"]
-  }
+#   ingress {
+#     from_port   = 5432
+#     to_port     = 5432
+#     protocol    = "tcp"
+#     cidr_blocks = ["0.0.0.0/0"]
+#   }
 
-  egress {
-    from_port   = 5432
-    to_port     = 5432
-    protocol    = "tcp"
-    cidr_blocks = ["0.0.0.0/0"]
-  }
+#   egress {
+#     from_port   = 5432
+#     to_port     = 5432
+#     protocol    = "tcp"
+#     cidr_blocks = ["0.0.0.0/0"]
+#   }
 
-  tags = {
-    Name = "${var.project_name}-rds"
-  }
-}
+#   tags = {
+#     Name = "${var.project_name}-rds"
+#   }
+# }
 
-resource "aws_db_parameter_group" "Terrific-Totes-param-gr" {
-  name   = "tt-db-param"
-  family = "postgres14"
+# resource "aws_db_parameter_group" "Terrific-Totes-param-gr" {
+#   name   = "tt-db-param"
+#   family = "postgres14"
 
-  parameter {
-    name  = "log_connections"
-    value = "1"
-  }
-}
+#   parameter {
+#     name  = "log_connections"
+#     value = "1"
+#   }
+# }
 
-resource "aws_db_instance" "terrific-totes-rds" {
-  db_name           = var.project_name
-  instance_class    = "db.t3.micro"
-  allocated_storage = 5
-  engine            = "postgres"
-  engine_version    = "14.10"
-  username          = "totes"
-  password          = "totes123"
-  # username          = "user credentials for the root user" # we could use .env here
-  # password          = "user password for the root user"    # we could use .env here
-  ### alternatively to providing username nad password we can specify:
-  # resource "aws_kms_key" "example_key" {
-  #       description = "Example KMS Key"
-  # }
-  # within the resource:
-  #   manage_master_user_password   = true
-  #   master_user_secret_kms_key_id = aws_kms_key.example.key_id
-  # }
-  db_subnet_group_name   = aws_db_subnet_group.Terrific-Totes-sub-gr.name
-  vpc_security_group_ids = [aws_security_group.rds.id]
-  parameter_group_name   = aws_db_parameter_group.Terrific-Totes-param-gr.name
-  publicly_accessible    = false
-  skip_final_snapshot    = true
-}
+# resource "aws_db_instance" "terrific-totes-rds" {
+#   db_name           = var.project_name
+#   instance_class    = "db.t3.micro"
+#   allocated_storage = 5
+#   engine            = "postgres"
+#   engine_version    = "14.10"
+#   username          = "totes"
+#   password          = "totes123"
+#   # username          = "user credentials for the root user" # we could use .env here
+#   # password          = "user password for the root user"    # we could use .env here
+#   ### alternatively to providing username nad password we can specify:
+#   # resource "aws_kms_key" "example_key" {
+#   #       description = "Example KMS Key"
+#   # }
+#   # within the resource:
+#   #   manage_master_user_password   = true
+#   #   master_user_secret_kms_key_id = aws_kms_key.example.key_id
+#   # }
+#   db_subnet_group_name   = aws_db_subnet_group.Terrific-Totes-sub-gr.name
+#   vpc_security_group_ids = [aws_security_group.rds.id]
+#   parameter_group_name   = aws_db_parameter_group.Terrific-Totes-param-gr.name
+#   publicly_accessible    = false
+#   skip_final_snapshot    = true
+# }
-- 
cgit v1.2.3


From 3d56751d93eeb5ef6cef1f44dd54ee38fcd1fe3c Mon Sep 17 00:00:00 2001
From: lian-manonog <lian.manonog@gmail.com>
Date: Fri, 16 Aug 2024 12:20:54 +0100
Subject: wip: change env line 14 to production

---
 .github/workflows/deploy.yml | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml
index 922daee..bd9df57 100644
--- a/.github/workflows/deploy.yml
+++ b/.github/workflows/deploy.yml
@@ -5,15 +5,17 @@ on:
     branches:
       - ci-cd-branch # Adjust the branch based on our deployment strategy
 
+
 jobs:
   deploy-terraform:
     name: Deploy Terraform
     runs-on: ubuntu-latest
-    environment: test-env
+    #needs: run-checks (must ref on-commit.yml file)
+    environment: production
     steps:
       - name: Checkout Repo
         uses: actions/checkout@v4
-
+      
       - name: Install Terraform
         uses: hashicorp/setup-terraform@v3
 
-- 
cgit v1.2.3


From 9ad481989e7033735815df7c2fe7a277433587a6 Mon Sep 17 00:00:00 2001
From: Alex <git@ajschof.me>
Date: Fri, 16 Aug 2024 12:36:46 +0100
Subject: ci: create .deepsource.toml

for automated commit checking
---
 .deepsource.toml | 25 +++++++++++++++++++++++++
 1 file changed, 25 insertions(+)
 create mode 100644 .deepsource.toml

diff --git a/.deepsource.toml b/.deepsource.toml
new file mode 100644
index 0000000..a5002ab
--- /dev/null
+++ b/.deepsource.toml
@@ -0,0 +1,25 @@
+version = 1
+
+[[analyzers]]
+name = "sql"
+
+[[analyzers]]
+name = "terraform"
+
+[[analyzers]]
+name = "python"
+
+  [analyzers.meta]
+  runtime_version = "3.x.x"
+
+[[analyzers]]
+name = "secrets"
+
+[[transformers]]
+name = "black"
+
+[[transformers]]
+name = "autopep8"
+
+[[transformers]]
+name = "ruff"
-- 
cgit v1.2.3


From ba82306f1646215f17b55099fd6342bca2a35c97 Mon Sep 17 00:00:00 2001
From: Alex <git@ajschof.me>
Date: Fri, 16 Aug 2024 12:42:48 +0100
Subject: ci: update .deepsource.toml

---
 .deepsource.toml | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/.deepsource.toml b/.deepsource.toml
index a5002ab..b435c8b 100644
--- a/.deepsource.toml
+++ b/.deepsource.toml
@@ -17,9 +17,14 @@ name = "secrets"
 
 [[transformers]]
 name = "black"
+enabled = "true"
 
 [[transformers]]
 name = "autopep8"
+enabled = "true"
 
 [[transformers]]
 name = "ruff"
+enabled = "true"
+
+
-- 
cgit v1.2.3


From b3aa2d97e4844b6c205fc9b85427f8d5f150388a Mon Sep 17 00:00:00 2001
From: Alex <git@ajschof.me>
Date: Fri, 16 Aug 2024 12:48:06 +0100
Subject: fix(ci): fix .deepsource.toml config

---
 .deepsource.toml | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/.deepsource.toml b/.deepsource.toml
index b435c8b..a840b78 100644
--- a/.deepsource.toml
+++ b/.deepsource.toml
@@ -17,14 +17,11 @@ name = "secrets"
 
 [[transformers]]
 name = "black"
-enabled = "true"
 
 [[transformers]]
 name = "autopep8"
-enabled = "true"
 
 [[transformers]]
 name = "ruff"
-enabled = "true"
 
 
-- 
cgit v1.2.3


From a217da60ba75a226bf72a9fc680c4cbabe883aea Mon Sep 17 00:00:00 2001
From: Alex Schofield <git@ajschof.me>
Date: Fri, 16 Aug 2024 12:53:22 +0100
Subject: docs: add empty sections

---
 README.md | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 0bf6b9d..6bc75dc 100644
--- a/README.md
+++ b/README.md
@@ -34,4 +34,10 @@ others.
 - [ ] Automated data loading into the data warehouse schema
 - [ ] Logging and monitoring with CloudWatch
 - [ ] Notifications for errors and successful runs (e.g. successful ingestion)
-- [ ] Visualisation of warehouse data
\ No newline at end of file
+- [ ] Visualisation of warehouse data
+
+# Test Coverage
+TBA
+
+# Contributors
+TBA
\ No newline at end of file
-- 
cgit v1.2.3


From dd68d948dec97fedfcaa89806523975ad1224c71 Mon Sep 17 00:00:00 2001
From: Ang Bel <anzelikabelotelova@Anzelikas-MacBook-Air.local>
Date: Fri, 16 Aug 2024 13:48:22 +0100
Subject: refactoring for extract lambda to filter by last updated and if not
 empty write it s3

---
 .gitignore            |  2 ++
 src/extract_lambda.py | 26 +++++++++++---------------
 2 files changed, 13 insertions(+), 15 deletions(-)

diff --git a/.gitignore b/.gitignore
index ca15434..bceab93 100644
--- a/.gitignore
+++ b/.gitignore
@@ -14,3 +14,5 @@ __pycache__/
 
 # OS-Related Files
 .DS_Store
+
+*venv*
diff --git a/src/extract_lambda.py b/src/extract_lambda.py
index f4c0c1d..e348bef 100644
--- a/src/extract_lambda.py
+++ b/src/extract_lambda.py
@@ -136,9 +136,9 @@ def process_and_upload_tables(db, existing_files, client=boto3.client('s3')):
     print(tables)
     for table in tables:
         table_name = table[0]
-        rows = db.run(f"SELECT * FROM {table_name};")
-        
+        rows = db.run(f"SELECT * FROM {table_name} WHERE last_updated >= {datetime.strftime(latest_timestamp,'%H-%m-%d %H:%M:%S')};")
 
+    if rows:
         csv_file_path = f"/tmp/{table_name}.csv"
         with open(csv_file_path, "w", newline='') as file:
             writer = csv.writer(file)
@@ -147,16 +147,12 @@ def process_and_upload_tables(db, existing_files, client=boto3.client('s3')):
             writer.writerow(column_names)
             writer.writerows(rows)
         s3_key = datetime.strftime(datetime.today(),f'{table_name}/%Y/%m/%d/{table_name}_%H:%M:%S.csv')
-        new_csv_content = open(csv_file_path, "r").read()
-        ## NEW CODE
-        latest_s3_object_key = datetime.strftime(latest_timestamp,f'{table_name}/%Y/%m/%d/{table_name}_%H:%M:%S.csv')
-        ## END OF NEW CODE
-        if existing_files[latest_s3_object_key] != new_csv_content: 
-            try:
-                client.upload_file(csv_file_path, extract_bucket(), s3_key)
-                logger.info(f"Uploaded {s3_key} to S3.")
-            except ClientError as e:
-                logger.error(f'Error uploading to S3: {e}')
-        else:
-            logger.info(f"No new data.")
-        
\ No newline at end of file
+
+        try:
+            client.upload_file(csv_file_path, extract_bucket(), s3_key)
+            logger.info(f"Uploaded {s3_key} to S3.")
+        except ClientError as e:
+            logger.error(f'Error uploading to S3: {e}')
+    else:
+        logger.info(f"No new data.")
+    
\ No newline at end of file
-- 
cgit v1.2.3


From c284df39ed7735d736f4fe0f2571ba846b8f6315 Mon Sep 17 00:00:00 2001
From: "deepsource-autofix[bot]"
 <62050782+deepsource-autofix[bot]@users.noreply.github.com>
Date: Fri, 16 Aug 2024 12:51:02 +0000
Subject: style: format code with Autopep8, Black and Ruff Formatter

This commit fixes the style issues introduced in dd68d94 according to the output
from Autopep8, Black and Ruff Formatter.

Details: https://github.com/ajschofield/de-project-bentley/pull/47
---
 src/extract_lambda.py | 130 +++++++++++++++++++++++++++-----------------------
 1 file changed, 71 insertions(+), 59 deletions(-)

diff --git a/src/extract_lambda.py b/src/extract_lambda.py
index e348bef..323d04a 100644
--- a/src/extract_lambda.py
+++ b/src/extract_lambda.py
@@ -20,48 +20,49 @@ class DBConnectionException(Exception):
         self.message = str(e)
         super().__init__(self.message)
 
+
 def lambda_handler(event, context):
     """This lambda function connects to the Totesys database, lists the contents of the ingestion bucket,
-       and converts all tables to CSV and if any of those tables do not exist in, or are different to the ones in s3, it uploads them
-       it uses 3 helper functions to achieve these 3 functionalities 
+    and converts all tables to CSV and if any of those tables do not exist in, or are different to the ones in s3, it uploads them
+    it uses 3 helper functions to achieve these 3 functionalities
     """
     try:
         db = connect_to_database()
         existing_files = list_existing_s3_files()
         any_changes = process_and_upload_tables(db, existing_files)
-        
+
         if not any_changes:
             logger.info("No changes detected in the database.")
             return {
-                'statusCode': 200,
-                'body': json.dumps('No changes detected, no CSV files were uploaded.')
+                "statusCode": 200,
+                "body": json.dumps("No changes detected, no CSV files were uploaded."),
             }
         else:
             return {
-                'statusCode': 200,
-                'body': json.dumps('CSV files processed and uploaded successfully.')
+                "statusCode": 200,
+                "body": json.dumps("CSV files processed and uploaded successfully."),
             }
     except Exception as e:
-        logger.error(f'Error: {e}')
-        return {
-            'statusCode': 500,
-            'body': json.dumps('Internal server error.')
-        }
+        logger.error(f"Error: {e}")
+        return {"statusCode": 500, "body": json.dumps("Internal server error.")}
     finally:
         if db:
             db.close()
 
 
-def retrieve_secrets(sm_client=boto3.client('secretsmanager'), secret_name='bentley-secrets'):
+def retrieve_secrets(
+    sm_client=boto3.client("secretsmanager"), secret_name="bentley-secrets"
+):
     try:
         response = sm_client.get_secret_value(SecretId=secret_name)
-        if 'SecretString' in response:
-            secret = json.loads(response['SecretString'])
+        if "SecretString" in response:
+            secret = json.loads(response["SecretString"])
             return secret
     except ClientError as e:
-        logger.error(f'Could not retrieve secrets: {e}')
+        logger.error(f"Could not retrieve secrets: {e}")
         raise e
 
+
 def connect_to_database() -> Connection:
     try:
         secrets = retrieve_secrets()
@@ -72,87 +73,98 @@ def connect_to_database() -> Connection:
         database = secrets["database"]
 
         return Connection(
-            database=database,
-            user=user,
-            password=password,
-            host=host,
-            port=port
+            database=database, user=user, password=password, host=host, port=port
         )
     except InterfaceError as i:
-        logger.error(f'Interface error: {i}')
+        logger.error(f"Interface error: {i}")
         raise DBConnectionException("Failed to connect to database")
 
-def extract_bucket(client=boto3.client('s3')):
+
+def extract_bucket(client=boto3.client("s3")):
     response = client.list_buckets()
-    extract_bucket_filter = [bucket['Name'] for bucket in response['Buckets'] if 'extract' in bucket['Name']]
+    extract_bucket_filter = [
+        bucket["Name"] for bucket in response["Buckets"] if "extract" in bucket["Name"]
+    ]
     return extract_bucket_filter[0]
 
-def list_existing_s3_files(bucket_name=extract_bucket(), client=boto3.client('s3')):
-    """Creates a dictionary and populates it with the 
-       results of listing the contents of the s3 bucket, then
-       returns the populated dictionary
+
+def list_existing_s3_files(bucket_name=extract_bucket(), client=boto3.client("s3")):
+    """Creates a dictionary and populates it with the
+    results of listing the contents of the s3 bucket, then
+    returns the populated dictionary
     """
-    
+
     existing_files = {}
-    
+
     try:
         response = client.list_objects_v2(Bucket=bucket_name)
-        
-        if 'Contents' in response:
-            for obj in response['Contents']:
-                s3_key = obj['Key']
+
+        if "Contents" in response:
+            for obj in response["Contents"]:
+                s3_key = obj["Key"]
                 try:
                     file_obj = client.get_object(Bucket=bucket_name, Key=s3_key)
-                    file_content = file_obj['Body'].read().decode('utf-8')
+                    file_content = file_obj["Body"].read().decode("utf-8")
                     existing_files[s3_key] = file_content
                 except ClientError as e:
-                    logger.error(f'Error retrieving S3 object {s3_key}: {e}')
+                    logger.error(f"Error retrieving S3 object {s3_key}: {e}")
         else:
-            logger.error('The bucket is empty')
-    
+            logger.error("The bucket is empty")
+
     except ClientError as e:
-        logger.error(f'Error listing S3 objects: {e}')
-    
-    return existing_files
+        logger.error(f"Error listing S3 objects: {e}")
 
+    return existing_files
 
 
-def process_and_upload_tables(db, existing_files, client=boto3.client('s3')):
-    """Creates a list of the tables from a database query and 
-       then selects everything from each table in individual queries
-       it then writes each table to CSV files and compares with the item 
-       in the existing_files dictionary with the same name. If it finds any changes
-       to files, or new tables/files it uploads them to the s3 bucket
+def process_and_upload_tables(db, existing_files, client=boto3.client("s3")):
+    """Creates a list of the tables from a database query and
+    then selects everything from each table in individual queries
+    it then writes each table to CSV files and compares with the item
+    in the existing_files dictionary with the same name. If it finds any changes
+    to files, or new tables/files it uploads them to the s3 bucket
     """
-    ## NEW CODE
+    # NEW CODE
     all_datetimes = []
     for file_names in existing_files.keys():
-        datetime_str_on_s3 = ''.join(re.search(r'\/(.+/).+_(.+)\.csv',file_names).group(1,2))
-        all_datetimes.append(datetime.strptime(datetime_str_on_s3, '%Y/%m/%d/%H:%M:%S'))
+        datetime_str_on_s3 = "".join(
+            re.search(r"\/(.+/).+_(.+)\.csv", file_names).group(1, 2)
+        )
+        all_datetimes.append(datetime.strptime(datetime_str_on_s3, "%Y/%m/%d/%H:%M:%S"))
     latest_timestamp = max(all_datetimes)
-    ## END OF NEW CODE
+    # END OF NEW CODE
 
-    tables = db.run("SELECT table_name FROM information_schema.tables WHERE table_schema='public' AND table_type='BASE TABLE';")
+    tables = db.run(
+        "SELECT table_name FROM information_schema.tables WHERE table_schema='public' AND table_type='BASE TABLE';"
+    )
     print(tables)
     for table in tables:
         table_name = table[0]
-        rows = db.run(f"SELECT * FROM {table_name} WHERE last_updated >= {datetime.strftime(latest_timestamp,'%H-%m-%d %H:%M:%S')};")
+        rows = db.run(
+            f"SELECT * FROM {table_name} WHERE last_updated >= {datetime.strftime(latest_timestamp,'%H-%m-%d %H:%M:%S')};"
+        )
 
     if rows:
         csv_file_path = f"/tmp/{table_name}.csv"
-        with open(csv_file_path, "w", newline='') as file:
+        with open(csv_file_path, "w", newline="") as file:
             writer = csv.writer(file)
-            #column_names = [desc["name"] for desc in db.columns(f"SELECT * FROM {table_name};")]
-            column_names = [col_name[0] for col_name in db.run(f"SELECT column_name FROM INFORMATION_SCHEMA.COLUMNS where table_name = '{table_name}';")]
+            # column_names = [desc["name"] for desc in db.columns(f"SELECT * FROM {table_name};")]
+            column_names = [
+                col_name[0]
+                for col_name in db.run(
+                    f"SELECT column_name FROM INFORMATION_SCHEMA.COLUMNS where table_name = '{table_name}';"
+                )
+            ]
             writer.writerow(column_names)
             writer.writerows(rows)
-        s3_key = datetime.strftime(datetime.today(),f'{table_name}/%Y/%m/%d/{table_name}_%H:%M:%S.csv')
+        s3_key = datetime.strftime(
+            datetime.today(), f"{table_name}/%Y/%m/%d/{table_name}_%H:%M:%S.csv"
+        )
 
         try:
             client.upload_file(csv_file_path, extract_bucket(), s3_key)
             logger.info(f"Uploaded {s3_key} to S3.")
         except ClientError as e:
-            logger.error(f'Error uploading to S3: {e}')
+            logger.error(f"Error uploading to S3: {e}")
     else:
         logger.info(f"No new data.")
-    
\ No newline at end of file
-- 
cgit v1.2.3


From e97ab6b46f181db107b7a640f386f5f57480347c Mon Sep 17 00:00:00 2001
From: lian-manonog <lian.manonog@gmail.com>
Date: Fri, 16 Aug 2024 14:16:03 +0100
Subject: add makefile in root: not in use currently

---
 Makefile | 80 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 80 insertions(+)
 create mode 100644 Makefile

diff --git a/Makefile b/Makefile
new file mode 100644
index 0000000..077cd98
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,80 @@
+##############################################
+#											 #
+#		MAKEFILE TO BUILD THE PROJECT		 #
+#          									 #
+##############################################
+
+PROJECT_NAME 	   = de-project-bentley
+REGION 		 	   = eu-west-2
+PYTHON_INTERPRETER = python 
+WD=$(shell pwd)
+PYTHONPATH=${WD}
+SHELL := /bin/bash
+PROFILE = default
+PIP:=pip
+
+## PYTHON INTERPRETER ENVIRONMENT
+create-environment:
+	@echo ">>> About to create environment: $(PROJECT_NAME)..."
+	@echo ">>> check python3 version"
+	( \
+		$(PYTHON_INTERPRETER) --version; \
+	)
+	@echo ">>> Setting up VirtualEnv."
+	( \
+	    $(PIP) install -q virtualenv virtualenvwrapper; \
+	    virtualenv venv --python=$(PYTHON_INTERPRETER); \
+	)
+
+ACTIVATE_ENV := source venv/bin/activate
+
+# Execute python related functionalities from within the project's environment
+define execute_in_env
+	$(ACTIVATE_ENV) && $1
+endef
+
+## Build the environment requirements
+requirements: create-environment
+	$(call execute_in_env, $(PIP) install -r ./requirements.txt)
+
+# Set Up
+## Install bandit
+bandit:
+	$(call execute_in_env, $(PIP) install bandit)
+
+## Install safety
+safety:
+	$(call execute_in_env, $(PIP) install safety)
+
+## Install black
+black:
+	$(call execute_in_env, $(PIP) install black)
+
+## Install coverage
+coverage:
+	$(call execute_in_env, $(PIP) install coverage)
+
+## Set up dev requirements (bandit, safety, black)
+dev-setup: bandit safety black coverage
+
+# Build / Run
+
+## Run the security test (bandit + safety)
+security-test:
+	$(call execute_in_env, safety check -r ./requirements.txt)
+	$(call execute_in_env, bandit -lll */*.py *c/*/*.py)
+
+## Run the black code check
+run-black:
+	$(call execute_in_env, black  ./src/*/*.py ./test/*/*.py)
+
+## Run the unit tests
+unit-test:
+	$(call execute_in_env, PYTHONPATH=${PYTHONPATH} pytest -v)
+
+## Run the coverage check
+check-coverage:
+	$(call execute_in_env, PYTHONPATH=${PYTHONPATH} pytest --cov=src test/)
+
+## Run all checks
+run-checks: security-test run-black unit-test check-coverage
-- 
cgit v1.2.3


From 2bcedc300f36760b55f0db8cfb4e724362d1c251 Mon Sep 17 00:00:00 2001
From: Alex Schofield <git@ajschof.me>
Date: Fri, 16 Aug 2024 14:27:41 +0100
Subject: chore(ci): remove redundant on-commit.yml

---
 .github/workflows/on-commit.yml | 50 -----------------------------------------
 1 file changed, 50 deletions(-)
 delete mode 100644 .github/workflows/on-commit.yml

diff --git a/.github/workflows/on-commit.yml b/.github/workflows/on-commit.yml
deleted file mode 100644
index fd9ffb8..0000000
--- a/.github/workflows/on-commit.yml
+++ /dev/null
@@ -1,50 +0,0 @@
-name: commit-qc-checks
-
-on:
-    push:
-        branches-ignore:
-            - 'main'
-
-jobs:
-    python-quality-checks:
-        runs-on: ubuntu-latest
-        steps:
-            - uses : actions/checkout@v4
-            - name : 'Python: Setup'
-              uses : actions/setup-python@v5
-              with:
-                python-version: 3.11
-            - name : 'Python: Install Dependencies'
-              run: |
-                python -m pip install --upgrade pip
-                pip install flake8 pylint black bandit safety
-              continue-on-error: true
-            - name : 'Python: Linting'
-              run: |
-                flake8 .
-                find . -name "*.py" | xargs pylint
-              continue-on-error: true
-            - name : 'Python: Formatting'
-              run: |
-                black --check .
-              continue-on-error: true
-    terraform-quality-checks:
-        runs-on: ubuntu-latest
-        steps:
-            - uses : actions/checkout@v4
-            - name: 'Terraform: Setup'
-              uses: hashicorp/setup-terraform@v3
-              with:
-                terraform_version: latest
-            - name: 'Terraform: Formatting'
-              working-directory: terraform
-              run: terraform fmt -check -recursive
-              continue-on-error: true
-            - name: 'Terraform: Initialise'
-              working-directory: terraform
-              run: terraform init -backend=false
-              continue-on-error: true
-            - name: 'Terraform: Validate'
-              working-directory: terraform
-              run: terraform validate
-              continue-on-error: true
\ No newline at end of file
-- 
cgit v1.2.3


From cf3d366e730e88ceea194d5b3b1d1a3ddecdd944 Mon Sep 17 00:00:00 2001
From: Alex Schofield <git@ajschof.me>
Date: Fri, 16 Aug 2024 14:30:07 +0100
Subject: ci: deploy only on push/pr to main

---
 .github/workflows/deploy.yml | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml
index bd9df57..db51d20 100644
--- a/.github/workflows/deploy.yml
+++ b/.github/workflows/deploy.yml
@@ -1,10 +1,13 @@
 name: deploy-terraform
 
 on:
-  push:
+  pull-request:
     branches:
-      - ci-cd-branch # Adjust the branch based on our deployment strategy
-
+      - main
+  pull:
+    branches:
+      - main
+      
 
 jobs:
   deploy-terraform:
@@ -36,4 +39,4 @@ jobs:
 
       - name: Terraform Apply
         working-directory: terraform
-        run: terraform apply --auto-approve
\ No newline at end of file
+        run: terraform apply --auto-approve
-- 
cgit v1.2.3


From 63b5f3e5f1888d5653d2f7b3529b3d72e3315dbf Mon Sep 17 00:00:00 2001
From: Alex Schofield <git@ajschof.me>
Date: Fri, 16 Aug 2024 14:43:46 +0100
Subject: fix(ci): amend pull_request syntax

---
 .github/workflows/deploy.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml
index db51d20..00c7263 100644
--- a/.github/workflows/deploy.yml
+++ b/.github/workflows/deploy.yml
@@ -1,7 +1,7 @@
 name: deploy-terraform
 
 on:
-  pull-request:
+  pull_request:
     branches:
       - main
   pull:
-- 
cgit v1.2.3


From 9cec304b2f8c2832c4a715bba784a34f7c674c19 Mon Sep 17 00:00:00 2001
From: Alex Schofield <git@ajschof.me>
Date: Fri, 16 Aug 2024 14:52:35 +0100
Subject: fix(ci): amend pull to push

---
 .github/workflows/deploy.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml
index 00c7263..5672048 100644
--- a/.github/workflows/deploy.yml
+++ b/.github/workflows/deploy.yml
@@ -4,7 +4,7 @@ on:
   pull_request:
     branches:
       - main
-  pull:
+  push:
     branches:
       - main
       
-- 
cgit v1.2.3


From aba65e0db08625c1ef0d3db6076b54e56e0b45ea Mon Sep 17 00:00:00 2001
From: Ang Bel <anzelikabelotelova@Anzelikas-MacBook-Air.local>
Date: Fri, 16 Aug 2024 14:20:39 +0100
Subject: refactor following github actions major risk message

---
 src/extract_lambda.py | 65 ++++++++++++++++++++++++---------------------------
 1 file changed, 31 insertions(+), 34 deletions(-)

diff --git a/src/extract_lambda.py b/src/extract_lambda.py
index 323d04a..cc09e87 100644
--- a/src/extract_lambda.py
+++ b/src/extract_lambda.py
@@ -30,8 +30,8 @@ def lambda_handler(event, context):
         db = connect_to_database()
         existing_files = list_existing_s3_files()
         any_changes = process_and_upload_tables(db, existing_files)
-
-        if not any_changes:
+        
+        if not any_changes['updated']:
             logger.info("No changes detected in the database.")
             return {
                 "statusCode": 200,
@@ -39,8 +39,9 @@ def lambda_handler(event, context):
             }
         else:
             return {
-                "statusCode": 200,
-                "body": json.dumps("CSV files processed and uploaded successfully."),
+                'statusCode': 200,
+                'body': json.dumps(f"""CSV files processed for {', '.join(any_changes['updated'])} and uploaded successfully.{
+                    'The following tables were not updated: '+', '.join(any_changes['no change']) if any_changes['no change'] else ''}""")
             }
     except Exception as e:
         logger.error(f"Error: {e}")
@@ -124,7 +125,8 @@ def process_and_upload_tables(db, existing_files, client=boto3.client("s3")):
     in the existing_files dictionary with the same name. If it finds any changes
     to files, or new tables/files it uploads them to the s3 bucket
     """
-    # NEW CODE
+    load_status = {'updated':[],'no change':[]}
+    ## Retrieving the latest file timestamp from S3 extract bucket
     all_datetimes = []
     for file_names in existing_files.keys():
         datetime_str_on_s3 = "".join(
@@ -132,39 +134,34 @@ def process_and_upload_tables(db, existing_files, client=boto3.client("s3")):
         )
         all_datetimes.append(datetime.strptime(datetime_str_on_s3, "%Y/%m/%d/%H:%M:%S"))
     latest_timestamp = max(all_datetimes)
-    # END OF NEW CODE
 
-    tables = db.run(
-        "SELECT table_name FROM information_schema.tables WHERE table_schema='public' AND table_type='BASE TABLE';"
-    )
-    print(tables)
+    ## Iterating through tables on the database and retrieving only latest changes vs previous file load
+    tables = db.run("SELECT table_name FROM information_schema.tables WHERE table_schema='public' AND table_type='BASE TABLE';")
     for table in tables:
         table_name = table[0]
         rows = db.run(
             f"SELECT * FROM {table_name} WHERE last_updated >= {datetime.strftime(latest_timestamp,'%H-%m-%d %H:%M:%S')};"
         )
 
-    if rows:
-        csv_file_path = f"/tmp/{table_name}.csv"
-        with open(csv_file_path, "w", newline="") as file:
-            writer = csv.writer(file)
-            # column_names = [desc["name"] for desc in db.columns(f"SELECT * FROM {table_name};")]
-            column_names = [
-                col_name[0]
-                for col_name in db.run(
-                    f"SELECT column_name FROM INFORMATION_SCHEMA.COLUMNS where table_name = '{table_name}';"
-                )
-            ]
-            writer.writerow(column_names)
-            writer.writerows(rows)
-        s3_key = datetime.strftime(
-            datetime.today(), f"{table_name}/%Y/%m/%d/{table_name}_%H:%M:%S.csv"
-        )
-
-        try:
-            client.upload_file(csv_file_path, extract_bucket(), s3_key)
-            logger.info(f"Uploaded {s3_key} to S3.")
-        except ClientError as e:
-            logger.error(f"Error uploading to S3: {e}")
-    else:
-        logger.info(f"No new data.")
+        ## Creating a temporary file path and writing the column name to it followed by each row of data
+        if rows:
+            csv_file_path = f"/tmp/{table_name}.csv"
+            with open(csv_file_path, "w", newline='') as file:
+                writer = csv.writer(file)
+                #column_names = [desc["name"] for desc in db.columns(f"SELECT * FROM {table_name};")]
+                column_names = [col_name[0] for col_name in db.run(f"SELECT column_name FROM INFORMATION_SCHEMA.COLUMNS where table_name = '{table_name}';")]
+                writer.writerow(column_names)
+                writer.writerows(rows)
+            s3_key = datetime.strftime(datetime.today(),f'{table_name}/%Y/%m/%d/{table_name}_%H:%M:%S.csv')
+
+            ## Writing the new file to S3 extract bucket:
+            try:
+                client.upload_file(csv_file_path, extract_bucket(), s3_key)
+                load_status['updated'].append(table_name)
+                logger.info(f"Uploaded {s3_key} to S3.")
+            except ClientError as e:
+                logger.error(f'Error uploading to S3: {e}')
+        else:
+            load_status['no change'].append(table_name)
+            logger.info(f"No new data in {table_name} name. Latest data retrieved is from {latest_timestamp}.")
+    return load_status 
-- 
cgit v1.2.3


From 4428b8d9e8903e93ca2efd9f95cea9205bf303a9 Mon Sep 17 00:00:00 2001
From: Ang Bel <anzelikabelotelova@Anzelikas-MacBook-Air.local>
Date: Fri, 16 Aug 2024 14:42:15 +0100
Subject: refactoring to be more in line with pythonic code practices and
 prevent sql injection

---
 src/extract_lambda.py | 17 +++++++++++------
 1 file changed, 11 insertions(+), 6 deletions(-)

diff --git a/src/extract_lambda.py b/src/extract_lambda.py
index cc09e87..d1a5c7c 100644
--- a/src/extract_lambda.py
+++ b/src/extract_lambda.py
@@ -1,4 +1,4 @@
-from pg8000.native import Connection, InterfaceError
+from pg8000.native import Connection, InterfaceError, identifier
 import boto3
 import csv
 from botocore.exceptions import ClientError
@@ -136,12 +136,15 @@ def process_and_upload_tables(db, existing_files, client=boto3.client("s3")):
     latest_timestamp = max(all_datetimes)
 
     ## Iterating through tables on the database and retrieving only latest changes vs previous file load
-    tables = db.run("SELECT table_name FROM information_schema.tables WHERE table_schema='public' AND table_type='BASE TABLE';")
+    tables = db.run("""
+                    SELECT table_name 
+                    FROM information_schema.tables 
+                    WHERE table_schema='public' AND table_type='BASE TABLE';""")
     for table in tables:
         table_name = table[0]
-        rows = db.run(
-            f"SELECT * FROM {table_name} WHERE last_updated >= {datetime.strftime(latest_timestamp,'%H-%m-%d %H:%M:%S')};"
-        )
+        rows = db.run(f"SELECT * FROM {identifier(table_name)} "
+                      "WHERE last_updated >= :latest;",
+                      latest={datetime.strftime(latest_timestamp,'%H-%m-%d %H:%M:%S')})
 
         ## Creating a temporary file path and writing the column name to it followed by each row of data
         if rows:
@@ -149,7 +152,9 @@ def process_and_upload_tables(db, existing_files, client=boto3.client("s3")):
             with open(csv_file_path, "w", newline='') as file:
                 writer = csv.writer(file)
                 #column_names = [desc["name"] for desc in db.columns(f"SELECT * FROM {table_name};")]
-                column_names = [col_name[0] for col_name in db.run(f"SELECT column_name FROM INFORMATION_SCHEMA.COLUMNS where table_name = '{table_name}';")]
+                column_names = [col_name[0] for col_name in 
+                                db.run("""SELECT column_name FROM INFORMATION_SCHEMA.COLUMNS 
+                                       WHERE table_name = :table ;""", table=table_name)]
                 writer.writerow(column_names)
                 writer.writerows(rows)
             s3_key = datetime.strftime(datetime.today(),f'{table_name}/%Y/%m/%d/{table_name}_%H:%M:%S.csv')
-- 
cgit v1.2.3


From e153f2072eafca2c83a84e2c4210c46a40dabaf4 Mon Sep 17 00:00:00 2001
From: "deepsource-autofix[bot]"
 <62050782+deepsource-autofix[bot]@users.noreply.github.com>
Date: Fri, 16 Aug 2024 14:36:15 +0000
Subject: style: format code with Autopep8, Black and Ruff Formatter

This commit fixes the style issues introduced in 4428b8d according to the output
from Autopep8, Black and Ruff Formatter.

Details: https://github.com/ajschofield/de-project-bentley/pull/47
---
 src/extract_lambda.py | 66 +++++++++++++++++++++++++++++++--------------------
 1 file changed, 40 insertions(+), 26 deletions(-)

diff --git a/src/extract_lambda.py b/src/extract_lambda.py
index d1a5c7c..9a0e509 100644
--- a/src/extract_lambda.py
+++ b/src/extract_lambda.py
@@ -30,8 +30,8 @@ def lambda_handler(event, context):
         db = connect_to_database()
         existing_files = list_existing_s3_files()
         any_changes = process_and_upload_tables(db, existing_files)
-        
-        if not any_changes['updated']:
+
+        if not any_changes["updated"]:
             logger.info("No changes detected in the database.")
             return {
                 "statusCode": 200,
@@ -39,9 +39,11 @@ def lambda_handler(event, context):
             }
         else:
             return {
-                'statusCode': 200,
-                'body': json.dumps(f"""CSV files processed for {', '.join(any_changes['updated'])} and uploaded successfully.{
-                    'The following tables were not updated: '+', '.join(any_changes['no change']) if any_changes['no change'] else ''}""")
+                "statusCode": 200,
+                "body": json.dumps(
+                    f"""CSV files processed for {', '.join(any_changes['updated'])} and uploaded successfully.{
+                    'The following tables were not updated: '+', '.join(any_changes['no change']) if any_changes['no change'] else ''}"""
+                ),
             }
     except Exception as e:
         logger.error(f"Error: {e}")
@@ -125,8 +127,8 @@ def process_and_upload_tables(db, existing_files, client=boto3.client("s3")):
     in the existing_files dictionary with the same name. If it finds any changes
     to files, or new tables/files it uploads them to the s3 bucket
     """
-    load_status = {'updated':[],'no change':[]}
-    ## Retrieving the latest file timestamp from S3 extract bucket
+    load_status = {"updated": [], "no change": []}
+    # Retrieving the latest file timestamp from S3 extract bucket
     all_datetimes = []
     for file_names in existing_files.keys():
         datetime_str_on_s3 = "".join(
@@ -135,38 +137,50 @@ def process_and_upload_tables(db, existing_files, client=boto3.client("s3")):
         all_datetimes.append(datetime.strptime(datetime_str_on_s3, "%Y/%m/%d/%H:%M:%S"))
     latest_timestamp = max(all_datetimes)
 
-    ## Iterating through tables on the database and retrieving only latest changes vs previous file load
-    tables = db.run("""
+    # Iterating through tables on the database and retrieving only latest changes vs previous file load
+    tables = db.run(
+        """
                     SELECT table_name 
                     FROM information_schema.tables 
-                    WHERE table_schema='public' AND table_type='BASE TABLE';""")
+                    WHERE table_schema='public' AND table_type='BASE TABLE';"""
+    )
     for table in tables:
         table_name = table[0]
-        rows = db.run(f"SELECT * FROM {identifier(table_name)} "
-                      "WHERE last_updated >= :latest;",
-                      latest={datetime.strftime(latest_timestamp,'%H-%m-%d %H:%M:%S')})
+        rows = db.run(
+            f"SELECT * FROM {identifier(table_name)} " "WHERE last_updated >= :latest;",
+            latest={datetime.strftime(latest_timestamp, "%H-%m-%d %H:%M:%S")},
+        )
 
-        ## Creating a temporary file path and writing the column name to it followed by each row of data
+        # Creating a temporary file path and writing the column name to it followed by each row of data
         if rows:
             csv_file_path = f"/tmp/{table_name}.csv"
-            with open(csv_file_path, "w", newline='') as file:
+            with open(csv_file_path, "w", newline="") as file:
                 writer = csv.writer(file)
-                #column_names = [desc["name"] for desc in db.columns(f"SELECT * FROM {table_name};")]
-                column_names = [col_name[0] for col_name in 
-                                db.run("""SELECT column_name FROM INFORMATION_SCHEMA.COLUMNS 
-                                       WHERE table_name = :table ;""", table=table_name)]
+                # column_names = [desc["name"] for desc in db.columns(f"SELECT * FROM {table_name};")]
+                column_names = [
+                    col_name[0]
+                    for col_name in db.run(
+                        """SELECT column_name FROM INFORMATION_SCHEMA.COLUMNS 
+                                       WHERE table_name = :table ;""",
+                        table=table_name,
+                    )
+                ]
                 writer.writerow(column_names)
                 writer.writerows(rows)
-            s3_key = datetime.strftime(datetime.today(),f'{table_name}/%Y/%m/%d/{table_name}_%H:%M:%S.csv')
+            s3_key = datetime.strftime(
+                datetime.today(), f"{table_name}/%Y/%m/%d/{table_name}_%H:%M:%S.csv"
+            )
 
-            ## Writing the new file to S3 extract bucket:
+            # Writing the new file to S3 extract bucket:
             try:
                 client.upload_file(csv_file_path, extract_bucket(), s3_key)
-                load_status['updated'].append(table_name)
+                load_status["updated"].append(table_name)
                 logger.info(f"Uploaded {s3_key} to S3.")
             except ClientError as e:
-                logger.error(f'Error uploading to S3: {e}')
+                logger.error(f"Error uploading to S3: {e}")
         else:
-            load_status['no change'].append(table_name)
-            logger.info(f"No new data in {table_name} name. Latest data retrieved is from {latest_timestamp}.")
-    return load_status 
+            load_status["no change"].append(table_name)
+            logger.info(
+                f"No new data in {table_name} name. Latest data retrieved is from {latest_timestamp}."
+            )
+    return load_status
-- 
cgit v1.2.3


From 890ca0434ce5f7c9e7bdba1482a86cd63a4ef8f9 Mon Sep 17 00:00:00 2001
From: Ang Bel <anzelikabelotelova@Anzelikas-MacBook-Air.local>
Date: Fri, 16 Aug 2024 15:45:03 +0100
Subject: dummy comment to test checks

---
 src/extract_lambda.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/extract_lambda.py b/src/extract_lambda.py
index 9a0e509..30c7005 100644
--- a/src/extract_lambda.py
+++ b/src/extract_lambda.py
@@ -11,7 +11,7 @@ import re
 logger = logging.getLogger()
 logger.setLevel(logging.INFO)
 
-
+## DB Exception class
 class DBConnectionException(Exception):
     """Wraps pg8000.native Error or DatabaseError."""
 
-- 
cgit v1.2.3


From 653cb35e50b339356274ff03c0d75ac3babf927f Mon Sep 17 00:00:00 2001
From: "deepsource-autofix[bot]"
 <62050782+deepsource-autofix[bot]@users.noreply.github.com>
Date: Fri, 16 Aug 2024 14:45:16 +0000
Subject: style: format code with Autopep8, Black and Ruff Formatter

This commit fixes the style issues introduced in 890ca04 according to the output
from Autopep8, Black and Ruff Formatter.

Details: https://github.com/ajschofield/de-project-bentley/pull/47
---
 src/extract_lambda.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/extract_lambda.py b/src/extract_lambda.py
index 30c7005..4168e27 100644
--- a/src/extract_lambda.py
+++ b/src/extract_lambda.py
@@ -11,7 +11,9 @@ import re
 logger = logging.getLogger()
 logger.setLevel(logging.INFO)
 
-## DB Exception class
+# DB Exception class
+
+
 class DBConnectionException(Exception):
     """Wraps pg8000.native Error or DatabaseError."""
 
-- 
cgit v1.2.3


From 39a33cecb5e19f15bed4a099b02bdba56c80c073 Mon Sep 17 00:00:00 2001
From: HastarTara <joslinrashleigh@gmail.com>
Date: Fri, 16 Aug 2024 16:05:03 +0100
Subject: infra[tf] update lambda extract doesnt work yet

---
 terraform/lambda.tf | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/terraform/lambda.tf b/terraform/lambda.tf
index 658b8c8..71ddd11 100644
--- a/terraform/lambda.tf
+++ b/terraform/lambda.tf
@@ -83,7 +83,7 @@ resource "aws_lambda_function" "load_lambda" {
 }
 
 locals {
-  layer_dir       = "${path.module}/../python"
+  layer_dir       = "${path.module}/.."
   requirements    = "${path.module}/../requirements.txt"
   layer_zip       = "${path.module}/../layer.zip"
 }
@@ -94,9 +94,9 @@ resource "null_resource" "prepare_layer" {
   }
   provisioner "local-exec" {
     command = <<EOT
-      mkdir -p ${local.layer_dir}/lib/python3.8/site-packages/
-      pip install -r ${local.requirements} -t ${local.layer_dir}/lib/python3.11/site-packages/
-      cd ${local.layer_dir} && zip -r ${local.layer_zip} .
+      mkdir -p ${local.layer_dir}/python/lib/python3.11/site-packages/
+      pip install -r ${local.requirements} -t ${local.layer_dir}/python/lib/python3.11/site-packages/
+
     EOT
 }
   }
-- 
cgit v1.2.3


From 7dc483269648e7869df713fa5f946803358cd595 Mon Sep 17 00:00:00 2001
From: Alex Schofield <git@ajschof.me>
Date: Fri, 16 Aug 2024 16:23:56 +0100
Subject: infra(tf): add version constraints for null and archive

---
 terraform/main.tf | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/terraform/main.tf b/terraform/main.tf
index 3b06701..310a251 100644
--- a/terraform/main.tf
+++ b/terraform/main.tf
@@ -4,6 +4,14 @@ terraform {
       source  = "hashicorp/aws"
       version = "~>5.0"
     }
+    null = {
+      source  = "hashicorp/null"
+      version = "~>3.2.2"
+    }
+    archive = {
+      source  = "hashicorp/archive"
+      version = "~>2.5.0"
+    }
   }
   backend "s3" {
     bucket = "bentley-project-secrets"
-- 
cgit v1.2.3


From 303725f83cf5551b3d165aa02ce81562de488a01 Mon Sep 17 00:00:00 2001
From: Alex Schofield <git@ajschof.me>
Date: Fri, 16 Aug 2024 16:24:44 +0100
Subject: infra(tf): re-add code that creates layer zip

---
 terraform/lambda.tf | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/terraform/lambda.tf b/terraform/lambda.tf
index 71ddd11..67fd6eb 100644
--- a/terraform/lambda.tf
+++ b/terraform/lambda.tf
@@ -83,9 +83,9 @@ resource "aws_lambda_function" "load_lambda" {
 }
 
 locals {
-  layer_dir       = "${path.module}/.."
-  requirements    = "${path.module}/../requirements.txt"
-  layer_zip       = "${path.module}/../layer.zip"
+  layer_dir    = "${path.module}/.."
+  requirements = "${path.module}/../requirements.txt"
+  layer_zip    = "${path.module}/../layer.zip"
 }
 
 resource "null_resource" "prepare_layer" {
@@ -96,23 +96,23 @@ resource "null_resource" "prepare_layer" {
     command = <<EOT
       mkdir -p ${local.layer_dir}/python/lib/python3.11/site-packages/
       pip install -r ${local.requirements} -t ${local.layer_dir}/python/lib/python3.11/site-packages/
-
+      cd ${local.layer_dir} && zip -r ${local.layer_zip} .
     EOT
-}
   }
+}
 
 resource "aws_s3_object" "layer_zip" {
-  bucket = aws_s3_bucket.lambda_code_bucket.bucket
-  key    = "layer.zip"
-  source = local.layer_zip
+  bucket     = aws_s3_bucket.lambda_code_bucket.bucket
+  key        = "layer.zip"
+  source     = local.layer_zip
   depends_on = [null_resource.prepare_layer]
 }
 
 resource "aws_lambda_layer_version" "lambda_layer" {
   layer_name          = "lambda_layer"
   compatible_runtimes = ["python3.11"]
-  s3_bucket           = aws_s3_bucket.lambda_code_bucket.bucket 
+  s3_bucket           = aws_s3_bucket.lambda_code_bucket.bucket
   s3_key              = aws_s3_object.layer_zip.key
-  skip_destroy = true
-  depends_on = [aws_s3_object.layer_zip]
-}
\ No newline at end of file
+  skip_destroy        = true
+  depends_on          = [aws_s3_object.layer_zip]
+}
-- 
cgit v1.2.3


From 3e0c59f5571e8f4f4f02048465a1efbed9c9f1c1 Mon Sep 17 00:00:00 2001
From: Alex Schofield <git@ajschof.me>
Date: Fri, 16 Aug 2024 16:28:04 +0100
Subject: chore(tf): remove dummy username/password

sorry hackers!
---
 terraform/rds.tf | 14 ++------------
 1 file changed, 2 insertions(+), 12 deletions(-)

diff --git a/terraform/rds.tf b/terraform/rds.tf
index d1b4959..a013fb3 100644
--- a/terraform/rds.tf
+++ b/terraform/rds.tf
@@ -60,18 +60,8 @@
 #   allocated_storage = 5
 #   engine            = "postgres"
 #   engine_version    = "14.10"
-#   username          = "totes"
-#   password          = "totes123"
-#   # username          = "user credentials for the root user" # we could use .env here
-#   # password          = "user password for the root user"    # we could use .env here
-#   ### alternatively to providing username nad password we can specify:
-#   # resource "aws_kms_key" "example_key" {
-#   #       description = "Example KMS Key"
-#   # }
-#   # within the resource:
-#   #   manage_master_user_password   = true
-#   #   master_user_secret_kms_key_id = aws_kms_key.example.key_id
-#   # }
+#   username          = ""
+#   password          = ""
 #   db_subnet_group_name   = aws_db_subnet_group.Terrific-Totes-sub-gr.name
 #   vpc_security_group_ids = [aws_security_group.rds.id]
 #   parameter_group_name   = aws_db_parameter_group.Terrific-Totes-param-gr.name
-- 
cgit v1.2.3


From 1e27974ecc48d8611b87af1b9cd51e29afa8c792 Mon Sep 17 00:00:00 2001
From: Alex Schofield <git@ajschof.me>
Date: Fri, 16 Aug 2024 17:15:59 +0100
Subject: test(fx): fix prepare_layer - broken

---
 terraform/lambda.tf | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/terraform/lambda.tf b/terraform/lambda.tf
index 67fd6eb..27e6266 100644
--- a/terraform/lambda.tf
+++ b/terraform/lambda.tf
@@ -89,14 +89,13 @@ locals {
 }
 
 resource "null_resource" "prepare_layer" {
-  triggers = {
-    requirements_hash = filesha1(local.requirements)
-  }
   provisioner "local-exec" {
     command = <<EOT
-      mkdir -p ${local.layer_dir}/python/lib/python3.11/site-packages/
-      pip install -r ${local.requirements} -t ${local.layer_dir}/python/lib/python3.11/site-packages/
-      cd ${local.layer_dir} && zip -r ${local.layer_zip} .
+      cd ${local.layer_dir}
+      rm -rf python
+      mkdir python
+      pip3 install -r ${local.requirements} -t python/
+      zip -r ${local.layer_zip} python/
     EOT
   }
 }
@@ -104,7 +103,7 @@ resource "null_resource" "prepare_layer" {
 resource "aws_s3_object" "layer_zip" {
   bucket     = aws_s3_bucket.lambda_code_bucket.bucket
   key        = "layer.zip"
-  source     = local.layer_zip
+  source     = "${local.layer_dir}/${local.layer_zip}"
   depends_on = [null_resource.prepare_layer]
 }
 
-- 
cgit v1.2.3


From 9dabc89c897f7dc9034e44c277d68e01c7e12ad7 Mon Sep 17 00:00:00 2001
From: Alex Schofield <git@ajschof.me>
Date: Fri, 16 Aug 2024 21:06:51 +0100
Subject: docs: add badges to README

---
 README.md | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/README.md b/README.md
index 6bc75dc..cbb446c 100644
--- a/README.md
+++ b/README.md
@@ -1,5 +1,13 @@
 # ToteSys - Data Engineering Project
 
+[![Python](https://img.shields.io/badge/Python-FFD43B?style=for-the-badge&logo=python&logoColor=blue)](https://www.python.org/)
+[![AWS](https://img.shields.io/badge/Amazon_AWS-FF9900?style=for-the-badge&logo=amazonaws&logoColor=white)](https://aws.amazon.com/)
+[![Terraform](https://img.shields.io/badge/Terraform-7B42BC?style=for-the-badge&logo=terraform&logoColor=white)](https://www.terraform.io/)
+[![Postgresql](https://img.shields.io/badge/PostgreSQL-316192?style=for-the-badge&logo=postgresql&logoColor=white)](https://www.postgresql.org/)
+[![GitHub Actions](https://img.shields.io/badge/GitHub_Actions-2088FF?style=for-the-badge&logo=github-actions&logoColor=white)](https://github.com/features/actions)
+
+[![Terraform Main Deployment Workflow Status](https://img.shields.io/github/actions/workflow/status/ajschofield/de-project-bentley/deploy.yml?branch=main&style=flat-square&label=deploy)](https://github.com/ajschofield/de-project-bentley/actions/workflows/deploy.yml?query=branch%3Amain)
+[![Production Environment Status](https://img.shields.io/github/deployments/ajschofield/de-project-bentley/production?style=flat-square&label=env)](https://github.com/ajschofield/de-project-bentley/deployments/production)
 # Summary
 The project aims to implement a data platform that can extract data from an
 operational database, archive it in a data lake, and make it easily accessible
-- 
cgit v1.2.3


From 95ad71be4315f5ae3f9183f66049ae8b8cf914fc Mon Sep 17 00:00:00 2001
From: "deepsource-autofix[bot]"
 <62050782+deepsource-autofix[bot]@users.noreply.github.com>
Date: Fri, 16 Aug 2024 20:07:43 +0000
Subject: style: format code with Autopep8, Black and Ruff Formatter

This commit fixes the style issues introduced in 9dabc89 according to the output
from Autopep8, Black and Ruff Formatter.

Details: https://github.com/ajschofield/de-project-bentley/pull/52
---
 src/load_lambda.py            |  2 +-
 src/secrets_manager.py        | 31 +++++++++----------
 src/transform_lambda.py       |  2 +-
 test/test_secrets_manager.py  | 19 +++++++-----
 tests/test_extract_lambda.py  | 69 ++++++++++++++++++++++++-------------------
 tests/test_secrets_manager.py | 37 +++++++++++++++--------
 6 files changed, 93 insertions(+), 67 deletions(-)

diff --git a/src/load_lambda.py b/src/load_lambda.py
index 6ee681f..c6a8e60 100644
--- a/src/load_lambda.py
+++ b/src/load_lambda.py
@@ -1,2 +1,2 @@
 def lambda_handler():
-    pass
\ No newline at end of file
+    pass
diff --git a/src/secrets_manager.py b/src/secrets_manager.py
index c0fb61e..3484688 100644
--- a/src/secrets_manager.py
+++ b/src/secrets_manager.py
@@ -4,45 +4,46 @@ import json
 
 
 def sm_client():
-    sm_client = boto3.client('secretsmanager')
+    sm_client = boto3.client("secretsmanager")
     yield sm_client
 
-def create_secret(sm_client, secret_name, cohort_id, user, password, host, database, port):
+
+def create_secret(
+    sm_client, secret_name, cohort_id, user, password, host, database, port
+):
     secret = {
         "cohort_id": cohort_id,
         "user": user,
         "password": password,
         "host": host,
         "database": database,
-        "port": port
+        "port": port,
     }
 
     response = sm_client.create_secret(
-        Name = secret_name,
-        SecretString = json.dumps(secret)
+        Name=secret_name, SecretString=json.dumps(secret)
     )
 
     print(response)
     return response
 
+
 def list_secret(sm_client):
     response = sm_client.list_secrets()
-    secret_dict = response['SecretList']
+    secret_dict = response["SecretList"]
     secret_names = []
     for items in secret_dict:
-        secret_names.append(items['Name'])
-    print(f'{len(secret_names)} secret(s) available')
+        secret_names.append(items["Name"])
+    print(f"{len(secret_names)} secret(s) available")
     for name in secret_names:
         print(name)
     return secret_names
 
-def retrieve_secrets(sm_client):
-    response = sm_client.get_secrets(
-        
-    )
 
+def retrieve_secrets(sm_client):
+    response = sm_client.get_secrets()
 
 
-#retrieve secret
-#so lambda can access totesy db
-#so lambda connect to the db and then retrieve the data
\ No newline at end of file
+# retrieve secret
+# so lambda can access totesy db
+# so lambda connect to the db and then retrieve the data
diff --git a/src/transform_lambda.py b/src/transform_lambda.py
index 6ee681f..c6a8e60 100644
--- a/src/transform_lambda.py
+++ b/src/transform_lambda.py
@@ -1,2 +1,2 @@
 def lambda_handler():
-    pass
\ No newline at end of file
+    pass
diff --git a/test/test_secrets_manager.py b/test/test_secrets_manager.py
index 86533bc..cb4ec15 100644
--- a/test/test_secrets_manager.py
+++ b/test/test_secrets_manager.py
@@ -2,10 +2,12 @@ from src.secrets_manager import sm_client, create_secret, list_secret
 import boto3
 from moto import mock_aws
 import json
-import pytest 
+import pytest
 import os
 
-pytest.fixture(scope='class')
+pytest.fixture(scope="class")
+
+
 def mock_aws_credentials():
     """Mocked AWS Credentials for moto."""
     os.environ["AWS_ACCESS_KEY_ID"] = "testing"
@@ -14,10 +16,11 @@ def mock_aws_credentials():
     os.environ["AWS_SESSION_TOKEN"] = "testing"
     os.environ["AWS_DEFAULT_REGION"] = "eu-west-2"
 
-@pytest.fixture(scope='class')
+
+@pytest.fixture(scope="class")
 def mock_sm_client(mock_aws_credentials):
     with mock_aws():
-        yield boto3.client('secretsmanager')
+        yield boto3.client("secretsmanager")
 
 
 def test_create_secret_stores_secrets(mock_sm_client):
@@ -29,6 +32,8 @@ def test_create_secret_stores_secrets(mock_sm_client):
     port = "test_port"
 
     secret_name = "test_secret"
-    response = create_secret(mock_sm_client, secret_name, cohort_id, user, password, host, database, port)
-    
-    assert response['Name'] == secret_name
\ No newline at end of file
+    response = create_secret(
+        mock_sm_client, secret_name, cohort_id, user, password, host, database, port
+    )
+
+    assert response["Name"] == secret_name
diff --git a/tests/test_extract_lambda.py b/tests/test_extract_lambda.py
index e94a8a4..877e36a 100644
--- a/tests/test_extract_lambda.py
+++ b/tests/test_extract_lambda.py
@@ -3,11 +3,17 @@ import boto3
 from moto import mock_aws
 from unittest.mock import patch, MagicMock
 from unittest import TestCase
-from src.extract_lambda import list_existing_s3_files, connect_to_database, DBConnectionException, process_and_upload_tables
-import os 
+from src.extract_lambda import (
+    list_existing_s3_files,
+    connect_to_database,
+    DBConnectionException,
+    process_and_upload_tables,
+)
+import os
 import logging
 
-@pytest.fixture(scope='class')
+
+@pytest.fixture(scope="class")
 def mock_config():
     env_vars = {
         "host": "abc",
@@ -20,54 +26,55 @@ def mock_config():
         yield mock_config
 
 
-@pytest.fixture(scope='class')
+@pytest.fixture(scope="class")
 def aws_credentials():
-    os.environ["AWS_ACCESS_KEY_ID"] = 'testing'
-    os.environ["AWS_SECRET_ACCESS_KEY"] = 'testing'
-    os.environ["AWS_SECURIT_TOKEN"] = 'testing'
-    os.environ["AWS_SESSION_TOKEN"] = 'testing'
-    os.environ["AWS_DEFAULT_REGION"]= 'eu-west-2'
+    os.environ["AWS_ACCESS_KEY_ID"] = "testing"
+    os.environ["AWS_SECRET_ACCESS_KEY"] = "testing"
+    os.environ["AWS_SECURIT_TOKEN"] = "testing"
+    os.environ["AWS_SESSION_TOKEN"] = "testing"
+    os.environ["AWS_DEFAULT_REGION"] = "eu-west-2"
+
 
-@pytest.fixture(scope='class')
+@pytest.fixture(scope="class")
 def s3_client(aws_credentials):
     with mock_aws():
-        yield boto3.client('s3')
+        yield boto3.client("s3")
+
 
 class TestListExistingS3Files:
     def test_error_if_no_bucket(self, s3_client, caplog):
-
         logger = logging.getLogger()
-        logger.info('Testing now.')
+        logger.info("Testing now.")
         caplog.set_level(logging.ERROR)
         list_existing_s3_files(client=s3_client)
-        assert 'Error listing S3 objects' in caplog.text
+        assert "Error listing S3 objects" in caplog.text
 
     def test_error_if_bucket_is_empty(self, s3_client, caplog):
-
-        s3_client.create_bucket(Bucket='extract_bucket', 
-                                CreateBucketConfiguration={
-                                    'LocationConstraint': 'eu-west-2'
-                                })
+        s3_client.create_bucket(
+            Bucket="extract_bucket",
+            CreateBucketConfiguration={"LocationConstraint": "eu-west-2"},
+        )
         list_existing_s3_files(client=s3_client)
-        assert 'The bucket is empty' in caplog.text 
+        assert "The bucket is empty" in caplog.text
 
     def test_error_retrieving_object(self, s3_client, caplog):
-        s3_client.upload_file('tests/dummy.txt', 'extract_bucket', 'dummy.txt')
-        list_existing_s3_files(bucket_name='test_bucket', client=s3_client)
+        s3_client.upload_file("tests/dummy.txt", "extract_bucket", "dummy.txt")
+        list_existing_s3_files(bucket_name="test_bucket", client=s3_client)
 
-        assert 'Error retrieving S3 object ' in caplog.text
+        assert "Error retrieving S3 object " in caplog.text
 
     def test_retrieves_file_content(self, s3_client, caplog):
         result = list_existing_s3_files(client=s3_client)
 
-        assert list(result.values()) == ['This is a test file.'] 
+        assert list(result.values()) == ["This is a test file."]
+
 
 class TestConnectToDatabase:
     def test_connect_to_database(mock_conn, mock_config):
-        with patch("src.extract_lambda.Connection", autospec=True) as mock_conn:  
+        with patch("src.extract_lambda.Connection", autospec=True) as mock_conn:
             connect_to_database()
             mock_conn.assert_called_with(
-            host="abc", user="def", port="5432", password="password", database="db"
+                host="abc", user="def", port="5432", password="password", database="db"
             )
 
     def test_database_error(self, mock_config):
@@ -76,12 +83,14 @@ class TestConnectToDatabase:
 
     def test_logs_interface_error(self, caplog):
         logger = logging.getLogger()
-        logger.info('Testing now.')
+        logger.info("Testing now.")
         caplog.set_level(logging.ERROR)
         with pytest.raises(DBConnectionException):
             connect_to_database()
-        assert 'Interface error' in caplog.text
-'''
+        assert "Interface error" in caplog.text
+
+
+"""
 class TestProcessAndUploadTables:
     def test_error_process_and_upload_tables(mock_conn, mock_config, s3_client, caplog):
         logger = logging.getLogger()
@@ -106,4 +115,4 @@ class TestProcessAndUploadTables:
             s3_client.upload_file('tests/dummy_identical.csv', 'extract_bucket', s3_key)
             process_and_upload_tables(mock_db(), existing_files, client=s3_client)
             assert 'No new data.' in caplog.text
-'''
\ No newline at end of file
+"""
diff --git a/tests/test_secrets_manager.py b/tests/test_secrets_manager.py
index a30be86..609c572 100644
--- a/tests/test_secrets_manager.py
+++ b/tests/test_secrets_manager.py
@@ -3,10 +3,11 @@ import boto3
 import botocore.exceptions
 from moto import mock_aws
 import json
-import pytest 
+import pytest
 import os
 
-@pytest.fixture(scope='function')
+
+@pytest.fixture(scope="function")
 def aws_credentials():
     """Mocked AWS Credentials for moto."""
     os.environ["AWS_ACCESS_KEY_ID"] = "testing"
@@ -15,12 +16,14 @@ def aws_credentials():
     os.environ["AWS_SESSION_TOKEN"] = "testing"
     os.environ["AWS_DEFAULT_REGION"] = "eu-west-2"
 
-@pytest.fixture(scope='function')
+
+@pytest.fixture(scope="function")
 def mock_sm_client(aws_credentials):
     with mock_aws():
         yield boto3.client("secretsmanager")
 
-@pytest.fixture(scope='function')
+
+@pytest.fixture(scope="function")
 def mock_store_secret(mock_sm_client):
     secret = {
         "cohort_id": "test_cohort_id",
@@ -28,15 +31,18 @@ def mock_store_secret(mock_sm_client):
         "password": "test_password",
         "host": "test_host",
         "database": "test_database",
-        "port": "test_port"
+        "port": "test_port",
     }
 
     secret_name = "test_secret"
 
-    response = mock_sm_client.create_secret(Name=secret_name, SecretString=json.dumps(secret))
+    response = mock_sm_client.create_secret(
+        Name=secret_name, SecretString=json.dumps(secret)
+    )
 
     return response
 
+
 def test_retrieves_secrets_returns_dictionary(mock_sm_client, mock_store_secret):
     secret_name = "test_secret"
 
@@ -44,8 +50,10 @@ def test_retrieves_secrets_returns_dictionary(mock_sm_client, mock_store_secret)
 
     assert isinstance(result, dict)
 
-def test_retrieves_secrets_returns_correct_keys_and_values(mock_sm_client, mock_store_secret):
 
+def test_retrieves_secrets_returns_correct_keys_and_values(
+    mock_sm_client, mock_store_secret
+):
     secret_name = "test_secret"
 
     result = retrieve_secrets(mock_sm_client, secret_name)
@@ -57,17 +65,20 @@ def test_retrieves_secrets_returns_correct_keys_and_values(mock_sm_client, mock_
     assert result["database"] == "test_database"
     assert result["port"] == "test_port"
 
-def test_retrieves_secrets_raises_error_if_secret_name_incorrect_data_type(mock_sm_client):
-    secret_name = [1, 2, 3]
 
+def test_retrieves_secrets_raises_error_if_secret_name_incorrect_data_type(
+    mock_sm_client,
+):
+    secret_name = [1, 2, 3]
 
     with pytest.raises(botocore.exceptions.ParamValidationError) as error:
         retrieve_secrets(mock_sm_client, secret_name)
 
 
-def test_retrieves_secrets_raises_error_if_secret_name_does_not_exist(mock_sm_client, mock_store_secret):
-    secret_name = 'test_secret_2'
-
+def test_retrieves_secrets_raises_error_if_secret_name_does_not_exist(
+    mock_sm_client, mock_store_secret
+):
+    secret_name = "test_secret_2"
 
     with pytest.raises(botocore.exceptions.ClientError) as error:
-        retrieve_secrets(mock_sm_client, secret_name)
\ No newline at end of file
+        retrieve_secrets(mock_sm_client, secret_name)
-- 
cgit v1.2.3


From e27c6b48897a48f8462b8a0f40deb0ddaf301b63 Mon Sep 17 00:00:00 2001
From: Ang Bel <anzelikabelotelova@Anzelikas-MacBook-Air.local>
Date: Mon, 19 Aug 2024 11:21:58 +0100
Subject: layers block update, function resources to inlcude attributes:
 layers, correct handler and source_code_hash

---
 terraform/lambda.tf | 70 +++++++++++++++++++++++++++++------------------------
 terraform/s3.tf     |  5 ++++
 2 files changed, 44 insertions(+), 31 deletions(-)

diff --git a/terraform/lambda.tf b/terraform/lambda.tf
index 27e6266..e33bc79 100644
--- a/terraform/lambda.tf
+++ b/terraform/lambda.tf
@@ -12,12 +12,14 @@ resource "aws_s3_object" "extract_lambda_code" {
 }
 
 resource "aws_lambda_function" "extract_lambda" {
-  function_name = var.extract_lambda_name
-  s3_bucket     = aws_s3_bucket.lambda_code_bucket.bucket
-  s3_key        = aws_s3_object.extract_lambda_code.key
-  role          = aws_iam_role.multi_service_role.arn
-  handler       = "extract_lambda.extract"
-  runtime       = "python3.11"
+  function_name    = var.extract_lambda_name
+  s3_bucket        = aws_s3_bucket.lambda_code_bucket.bucket
+  s3_key           = aws_s3_object.extract_lambda_code.key
+  layers           = [aws_lambda_layer_version.lambda_layer.arn]
+  role             = aws_iam_role.multi_service_role.arn
+  handler          = "extract_lambda.lambda_handler"
+  runtime          = "python3.11"
+  source_code_hash = data.archive_file.extract_lambda_zip.output_base64sha256
 
   lifecycle {
     create_before_destroy = true
@@ -40,12 +42,14 @@ resource "aws_s3_object" "transform_lambda_code" {
 }
 
 resource "aws_lambda_function" "transform_lambda" {
-  function_name = var.transform_lambda_name
-  s3_bucket     = aws_s3_bucket.lambda_code_bucket.bucket
-  s3_key        = aws_s3_object.transform_lambda_code.key
-  role          = aws_iam_role.multi_service_role.arn
-  handler       = "transform_lambda.transform"
-  runtime       = "python3.11"
+  function_name    = var.transform_lambda_name
+  s3_bucket        = aws_s3_bucket.lambda_code_bucket.bucket
+  s3_key           = aws_s3_object.transform_lambda_code.key
+  layers           = [aws_lambda_layer_version.lambda_layer.arn]
+  role             = aws_iam_role.multi_service_role.arn
+  handler          = "transform_lambda.lambda_handler"
+  runtime          = "python3.11"
+  source_code_hash = data.archive_file.transform_lambda_zip.output_base64sha256
 
   lifecycle {
     create_before_destroy = true
@@ -68,12 +72,14 @@ resource "aws_s3_object" "load_lambda_code" {
 }
 
 resource "aws_lambda_function" "load_lambda" {
-  function_name = var.load_lambda_name
-  s3_bucket     = aws_s3_bucket.lambda_code_bucket.bucket
-  s3_key        = aws_s3_object.load_lambda_code.key
-  role          = aws_iam_role.multi_service_role.arn
-  handler       = "load_lambda.load"
-  runtime       = "python3.11"
+  function_name    = var.load_lambda_name
+  s3_bucket        = aws_s3_bucket.lambda_code_bucket.bucket
+  s3_key           = aws_s3_object.load_lambda_code.key
+  layers           = [aws_lambda_layer_version.lambda_layer.arn]
+  role             = aws_iam_role.multi_service_role.arn
+  handler          = "load_lambda.lambda_handler"
+  runtime          = "python3.11"
+  source_code_hash = data.archive_file.load_lambda_zip.output_base64sha256
 
   lifecycle {
     create_before_destroy = true
@@ -82,10 +88,12 @@ resource "aws_lambda_function" "load_lambda" {
   depends_on = [aws_s3_object.load_lambda_code]
 }
 
+# Lambda Layer Specification
 locals {
-  layer_dir    = "${path.module}/.."
-  requirements = "${path.module}/../requirements.txt"
-  layer_zip    = "${path.module}/../layer.zip"
+  layer_dir    = "lambda_layer"
+  requirements = "requirements.txt"
+  layer_zip    = "layer.zip"
+  layer_name   = "lambda_layer_dev"
 }
 
 resource "null_resource" "prepare_layer" {
@@ -95,23 +103,23 @@ resource "null_resource" "prepare_layer" {
       rm -rf python
       mkdir python
       pip3 install -r ${local.requirements} -t python/
-      zip -r ${local.layer_zip} python/
-    EOT
-  }
+      zip -r ${local.layer_zip} python 
+    EOT 
+  } #removed / at the end of python in line 99
 }
 
-resource "aws_s3_object" "layer_zip" {
-  bucket     = aws_s3_bucket.lambda_code_bucket.bucket
-  key        = "layer.zip"
+resource "aws_s3_object" "lambda_layer_zip" {
+  bucket     = aws_s3_bucket.lambda_code_bucket.id #bucket instead of id
+  key        = "lambda_layer/${local.layer_name}/${local.layer_zip}"
   source     = "${local.layer_dir}/${local.layer_zip}"
   depends_on = [null_resource.prepare_layer]
 }
 
 resource "aws_lambda_layer_version" "lambda_layer" {
-  layer_name          = "lambda_layer"
+  layer_name          = local.layer_name
   compatible_runtimes = ["python3.11"]
-  s3_bucket           = aws_s3_bucket.lambda_code_bucket.bucket
-  s3_key              = aws_s3_object.layer_zip.key
+  s3_bucket           = aws_s3_bucket.lambda_layer_bucket.id #bucket instead of id
+  s3_key              = aws_s3_object.lambda_layer_zip.key
   skip_destroy        = true
-  depends_on          = [aws_s3_object.layer_zip]
+  depends_on          = [aws_s3_object.lambda_layer_zip]
 }
diff --git a/terraform/s3.tf b/terraform/s3.tf
index d5cdee3..b3a863c 100644
--- a/terraform/s3.tf
+++ b/terraform/s3.tf
@@ -12,3 +12,8 @@ resource "aws_s3_bucket" "transform_bucket" {
 resource "aws_s3_bucket" "lambda_code_bucket" {
   bucket_prefix = "${var.s3_code_bucket_name}-"
 }
+
+### LAMBDA LAYER BUCKET
+resource "aws_s3_bucket" "lambda_layer_bucket" {
+  bucket_prefix = "lambda-layer-dev-"
+}
\ No newline at end of file
-- 
cgit v1.2.3


From 43df5dd9c6bd21f33a7fccbc9b81ad3677637da5 Mon Sep 17 00:00:00 2001
From: "deepsource-autofix[bot]"
 <62050782+deepsource-autofix[bot]@users.noreply.github.com>
Date: Mon, 19 Aug 2024 10:23:19 +0000
Subject: style: format code with Autopep8, Black and Ruff Formatter

This commit fixes the style issues introduced in e27c6b4 according to the output
from Autopep8, Black and Ruff Formatter.

Details: https://github.com/ajschofield/de-project-bentley/pull/55
---
 src/load_lambda.py            |  2 +-
 src/secrets_manager.py        | 31 +++++++++----------
 src/transform_lambda.py       |  2 +-
 test/test_secrets_manager.py  | 19 +++++++-----
 tests/test_extract_lambda.py  | 69 ++++++++++++++++++++++++-------------------
 tests/test_secrets_manager.py | 37 +++++++++++++++--------
 6 files changed, 93 insertions(+), 67 deletions(-)

diff --git a/src/load_lambda.py b/src/load_lambda.py
index 6ee681f..c6a8e60 100644
--- a/src/load_lambda.py
+++ b/src/load_lambda.py
@@ -1,2 +1,2 @@
 def lambda_handler():
-    pass
\ No newline at end of file
+    pass
diff --git a/src/secrets_manager.py b/src/secrets_manager.py
index c0fb61e..3484688 100644
--- a/src/secrets_manager.py
+++ b/src/secrets_manager.py
@@ -4,45 +4,46 @@ import json
 
 
 def sm_client():
-    sm_client = boto3.client('secretsmanager')
+    sm_client = boto3.client("secretsmanager")
     yield sm_client
 
-def create_secret(sm_client, secret_name, cohort_id, user, password, host, database, port):
+
+def create_secret(
+    sm_client, secret_name, cohort_id, user, password, host, database, port
+):
     secret = {
         "cohort_id": cohort_id,
         "user": user,
         "password": password,
         "host": host,
         "database": database,
-        "port": port
+        "port": port,
     }
 
     response = sm_client.create_secret(
-        Name = secret_name,
-        SecretString = json.dumps(secret)
+        Name=secret_name, SecretString=json.dumps(secret)
     )
 
     print(response)
     return response
 
+
 def list_secret(sm_client):
     response = sm_client.list_secrets()
-    secret_dict = response['SecretList']
+    secret_dict = response["SecretList"]
     secret_names = []
     for items in secret_dict:
-        secret_names.append(items['Name'])
-    print(f'{len(secret_names)} secret(s) available')
+        secret_names.append(items["Name"])
+    print(f"{len(secret_names)} secret(s) available")
     for name in secret_names:
         print(name)
     return secret_names
 
-def retrieve_secrets(sm_client):
-    response = sm_client.get_secrets(
-        
-    )
 
+def retrieve_secrets(sm_client):
+    response = sm_client.get_secrets()
 
 
-#retrieve secret
-#so lambda can access totesy db
-#so lambda connect to the db and then retrieve the data
\ No newline at end of file
+# retrieve secret
+# so lambda can access totesy db
+# so lambda connect to the db and then retrieve the data
diff --git a/src/transform_lambda.py b/src/transform_lambda.py
index 6ee681f..c6a8e60 100644
--- a/src/transform_lambda.py
+++ b/src/transform_lambda.py
@@ -1,2 +1,2 @@
 def lambda_handler():
-    pass
\ No newline at end of file
+    pass
diff --git a/test/test_secrets_manager.py b/test/test_secrets_manager.py
index 86533bc..cb4ec15 100644
--- a/test/test_secrets_manager.py
+++ b/test/test_secrets_manager.py
@@ -2,10 +2,12 @@ from src.secrets_manager import sm_client, create_secret, list_secret
 import boto3
 from moto import mock_aws
 import json
-import pytest 
+import pytest
 import os
 
-pytest.fixture(scope='class')
+pytest.fixture(scope="class")
+
+
 def mock_aws_credentials():
     """Mocked AWS Credentials for moto."""
     os.environ["AWS_ACCESS_KEY_ID"] = "testing"
@@ -14,10 +16,11 @@ def mock_aws_credentials():
     os.environ["AWS_SESSION_TOKEN"] = "testing"
     os.environ["AWS_DEFAULT_REGION"] = "eu-west-2"
 
-@pytest.fixture(scope='class')
+
+@pytest.fixture(scope="class")
 def mock_sm_client(mock_aws_credentials):
     with mock_aws():
-        yield boto3.client('secretsmanager')
+        yield boto3.client("secretsmanager")
 
 
 def test_create_secret_stores_secrets(mock_sm_client):
@@ -29,6 +32,8 @@ def test_create_secret_stores_secrets(mock_sm_client):
     port = "test_port"
 
     secret_name = "test_secret"
-    response = create_secret(mock_sm_client, secret_name, cohort_id, user, password, host, database, port)
-    
-    assert response['Name'] == secret_name
\ No newline at end of file
+    response = create_secret(
+        mock_sm_client, secret_name, cohort_id, user, password, host, database, port
+    )
+
+    assert response["Name"] == secret_name
diff --git a/tests/test_extract_lambda.py b/tests/test_extract_lambda.py
index e94a8a4..877e36a 100644
--- a/tests/test_extract_lambda.py
+++ b/tests/test_extract_lambda.py
@@ -3,11 +3,17 @@ import boto3
 from moto import mock_aws
 from unittest.mock import patch, MagicMock
 from unittest import TestCase
-from src.extract_lambda import list_existing_s3_files, connect_to_database, DBConnectionException, process_and_upload_tables
-import os 
+from src.extract_lambda import (
+    list_existing_s3_files,
+    connect_to_database,
+    DBConnectionException,
+    process_and_upload_tables,
+)
+import os
 import logging
 
-@pytest.fixture(scope='class')
+
+@pytest.fixture(scope="class")
 def mock_config():
     env_vars = {
         "host": "abc",
@@ -20,54 +26,55 @@ def mock_config():
         yield mock_config
 
 
-@pytest.fixture(scope='class')
+@pytest.fixture(scope="class")
 def aws_credentials():
-    os.environ["AWS_ACCESS_KEY_ID"] = 'testing'
-    os.environ["AWS_SECRET_ACCESS_KEY"] = 'testing'
-    os.environ["AWS_SECURIT_TOKEN"] = 'testing'
-    os.environ["AWS_SESSION_TOKEN"] = 'testing'
-    os.environ["AWS_DEFAULT_REGION"]= 'eu-west-2'
+    os.environ["AWS_ACCESS_KEY_ID"] = "testing"
+    os.environ["AWS_SECRET_ACCESS_KEY"] = "testing"
+    os.environ["AWS_SECURIT_TOKEN"] = "testing"
+    os.environ["AWS_SESSION_TOKEN"] = "testing"
+    os.environ["AWS_DEFAULT_REGION"] = "eu-west-2"
+
 
-@pytest.fixture(scope='class')
+@pytest.fixture(scope="class")
 def s3_client(aws_credentials):
     with mock_aws():
-        yield boto3.client('s3')
+        yield boto3.client("s3")
+
 
 class TestListExistingS3Files:
     def test_error_if_no_bucket(self, s3_client, caplog):
-
         logger = logging.getLogger()
-        logger.info('Testing now.')
+        logger.info("Testing now.")
         caplog.set_level(logging.ERROR)
         list_existing_s3_files(client=s3_client)
-        assert 'Error listing S3 objects' in caplog.text
+        assert "Error listing S3 objects" in caplog.text
 
     def test_error_if_bucket_is_empty(self, s3_client, caplog):
-
-        s3_client.create_bucket(Bucket='extract_bucket', 
-                                CreateBucketConfiguration={
-                                    'LocationConstraint': 'eu-west-2'
-                                })
+        s3_client.create_bucket(
+            Bucket="extract_bucket",
+            CreateBucketConfiguration={"LocationConstraint": "eu-west-2"},
+        )
         list_existing_s3_files(client=s3_client)
-        assert 'The bucket is empty' in caplog.text 
+        assert "The bucket is empty" in caplog.text
 
     def test_error_retrieving_object(self, s3_client, caplog):
-        s3_client.upload_file('tests/dummy.txt', 'extract_bucket', 'dummy.txt')
-        list_existing_s3_files(bucket_name='test_bucket', client=s3_client)
+        s3_client.upload_file("tests/dummy.txt", "extract_bucket", "dummy.txt")
+        list_existing_s3_files(bucket_name="test_bucket", client=s3_client)
 
-        assert 'Error retrieving S3 object ' in caplog.text
+        assert "Error retrieving S3 object " in caplog.text
 
     def test_retrieves_file_content(self, s3_client, caplog):
         result = list_existing_s3_files(client=s3_client)
 
-        assert list(result.values()) == ['This is a test file.'] 
+        assert list(result.values()) == ["This is a test file."]
+
 
 class TestConnectToDatabase:
     def test_connect_to_database(mock_conn, mock_config):
-        with patch("src.extract_lambda.Connection", autospec=True) as mock_conn:  
+        with patch("src.extract_lambda.Connection", autospec=True) as mock_conn:
             connect_to_database()
             mock_conn.assert_called_with(
-            host="abc", user="def", port="5432", password="password", database="db"
+                host="abc", user="def", port="5432", password="password", database="db"
             )
 
     def test_database_error(self, mock_config):
@@ -76,12 +83,14 @@ class TestConnectToDatabase:
 
     def test_logs_interface_error(self, caplog):
         logger = logging.getLogger()
-        logger.info('Testing now.')
+        logger.info("Testing now.")
         caplog.set_level(logging.ERROR)
         with pytest.raises(DBConnectionException):
             connect_to_database()
-        assert 'Interface error' in caplog.text
-'''
+        assert "Interface error" in caplog.text
+
+
+"""
 class TestProcessAndUploadTables:
     def test_error_process_and_upload_tables(mock_conn, mock_config, s3_client, caplog):
         logger = logging.getLogger()
@@ -106,4 +115,4 @@ class TestProcessAndUploadTables:
             s3_client.upload_file('tests/dummy_identical.csv', 'extract_bucket', s3_key)
             process_and_upload_tables(mock_db(), existing_files, client=s3_client)
             assert 'No new data.' in caplog.text
-'''
\ No newline at end of file
+"""
diff --git a/tests/test_secrets_manager.py b/tests/test_secrets_manager.py
index a30be86..609c572 100644
--- a/tests/test_secrets_manager.py
+++ b/tests/test_secrets_manager.py
@@ -3,10 +3,11 @@ import boto3
 import botocore.exceptions
 from moto import mock_aws
 import json
-import pytest 
+import pytest
 import os
 
-@pytest.fixture(scope='function')
+
+@pytest.fixture(scope="function")
 def aws_credentials():
     """Mocked AWS Credentials for moto."""
     os.environ["AWS_ACCESS_KEY_ID"] = "testing"
@@ -15,12 +16,14 @@ def aws_credentials():
     os.environ["AWS_SESSION_TOKEN"] = "testing"
     os.environ["AWS_DEFAULT_REGION"] = "eu-west-2"
 
-@pytest.fixture(scope='function')
+
+@pytest.fixture(scope="function")
 def mock_sm_client(aws_credentials):
     with mock_aws():
         yield boto3.client("secretsmanager")
 
-@pytest.fixture(scope='function')
+
+@pytest.fixture(scope="function")
 def mock_store_secret(mock_sm_client):
     secret = {
         "cohort_id": "test_cohort_id",
@@ -28,15 +31,18 @@ def mock_store_secret(mock_sm_client):
         "password": "test_password",
         "host": "test_host",
         "database": "test_database",
-        "port": "test_port"
+        "port": "test_port",
     }
 
     secret_name = "test_secret"
 
-    response = mock_sm_client.create_secret(Name=secret_name, SecretString=json.dumps(secret))
+    response = mock_sm_client.create_secret(
+        Name=secret_name, SecretString=json.dumps(secret)
+    )
 
     return response
 
+
 def test_retrieves_secrets_returns_dictionary(mock_sm_client, mock_store_secret):
     secret_name = "test_secret"
 
@@ -44,8 +50,10 @@ def test_retrieves_secrets_returns_dictionary(mock_sm_client, mock_store_secret)
 
     assert isinstance(result, dict)
 
-def test_retrieves_secrets_returns_correct_keys_and_values(mock_sm_client, mock_store_secret):
 
+def test_retrieves_secrets_returns_correct_keys_and_values(
+    mock_sm_client, mock_store_secret
+):
     secret_name = "test_secret"
 
     result = retrieve_secrets(mock_sm_client, secret_name)
@@ -57,17 +65,20 @@ def test_retrieves_secrets_returns_correct_keys_and_values(mock_sm_client, mock_
     assert result["database"] == "test_database"
     assert result["port"] == "test_port"
 
-def test_retrieves_secrets_raises_error_if_secret_name_incorrect_data_type(mock_sm_client):
-    secret_name = [1, 2, 3]
 
+def test_retrieves_secrets_raises_error_if_secret_name_incorrect_data_type(
+    mock_sm_client,
+):
+    secret_name = [1, 2, 3]
 
     with pytest.raises(botocore.exceptions.ParamValidationError) as error:
         retrieve_secrets(mock_sm_client, secret_name)
 
 
-def test_retrieves_secrets_raises_error_if_secret_name_does_not_exist(mock_sm_client, mock_store_secret):
-    secret_name = 'test_secret_2'
-
+def test_retrieves_secrets_raises_error_if_secret_name_does_not_exist(
+    mock_sm_client, mock_store_secret
+):
+    secret_name = "test_secret_2"
 
     with pytest.raises(botocore.exceptions.ClientError) as error:
-        retrieve_secrets(mock_sm_client, secret_name)
\ No newline at end of file
+        retrieve_secrets(mock_sm_client, secret_name)
-- 
cgit v1.2.3


From 1ea59ed0d92d5bbbd1ffe46ca7a1e296aa55fb1f Mon Sep 17 00:00:00 2001
From: T-Aji <tolujbd2@gmail.com>
Date: Mon, 19 Aug 2024 11:29:45 +0100
Subject: all tests added

---
 tests/test_extract_lambda.py | 155 ++++++++++++++++++++++++++++++++++---------
 1 file changed, 125 insertions(+), 30 deletions(-)

diff --git a/tests/test_extract_lambda.py b/tests/test_extract_lambda.py
index e94a8a4..67cb6d3 100644
--- a/tests/test_extract_lambda.py
+++ b/tests/test_extract_lambda.py
@@ -3,11 +3,19 @@ import boto3
 from moto import mock_aws
 from unittest.mock import patch, MagicMock
 from unittest import TestCase
-from src.extract_lambda import list_existing_s3_files, connect_to_database, DBConnectionException, process_and_upload_tables
-import os 
+from src.extract_lambda import (
+    list_existing_s3_files,
+    connect_to_database,
+    DBConnectionException,
+    lambda_handler,
+    process_and_upload_tables,
+)
+import os
 import logging
+import json
 
-@pytest.fixture(scope='class')
+
+@pytest.fixture(scope="class")
 def mock_config():
     env_vars = {
         "host": "abc",
@@ -20,54 +28,139 @@ def mock_config():
         yield mock_config
 
 
-@pytest.fixture(scope='class')
+@pytest.fixture(scope="class")
 def aws_credentials():
-    os.environ["AWS_ACCESS_KEY_ID"] = 'testing'
-    os.environ["AWS_SECRET_ACCESS_KEY"] = 'testing'
-    os.environ["AWS_SECURIT_TOKEN"] = 'testing'
-    os.environ["AWS_SESSION_TOKEN"] = 'testing'
-    os.environ["AWS_DEFAULT_REGION"]= 'eu-west-2'
+    os.environ["AWS_ACCESS_KEY_ID"] = "testing"
+    os.environ["AWS_SECRET_ACCESS_KEY"] = "testing"
+    os.environ["AWS_SECURIT_TOKEN"] = "testing"
+    os.environ["AWS_SESSION_TOKEN"] = "testing"
+    os.environ["AWS_DEFAULT_REGION"] = "eu-west-2"
+
 
-@pytest.fixture(scope='class')
+@pytest.fixture(scope="class")
 def s3_client(aws_credentials):
     with mock_aws():
-        yield boto3.client('s3')
+        yield boto3.client("s3")
+
+
+class TestLambdaHandler:
+    def test_lambda_handler_files_processed_and_uploaded_successfully(self, mocker):
+        mock_db = MagicMock()
+        mock_db.run.side_effect = [
+            [["Fruits"]],
+            [["Vegetable", "Sour", "Green"], ["Berry", "Sweet", "Red"]],
+            [["Food_type"], ["Flavour"], ["Colour"]],
+        ]
+        mock_db.columns.return_value = [
+            {"name": "Food_type"},
+            {"name": "Flavour"},
+            {"name": "Colour"},
+        ]
+        with patch("src.extract_lambda.connect_to_database", return_value=mock_db):
+            mock_process_and_upload_tables = mocker.patch(
+                "src.extract_lambda.process_and_upload_tables", return_value=mock_db
+            )
+            mock_list_existing_s3_files = mocker.patch(
+                "src.extract_lambda.list_existing_s3_files", return_value={}
+            )
+            event = {}
+            context = {}
+            response = lambda_handler(event, context)
+            assert response["statusCode"] == 200
+            assert (
+                json.loads(response["body"])
+                == "CSV files processed and uploaded successfully."
+            )
+            mock_list_existing_s3_files.assert_called_once()
+            mock_process_and_upload_tables.assert_called_once_with(mock_db, {})
+            mock_db.close.assert_called_once()
+
+    def test_lambda_handler_no_changes_detected_no_files_uploaded(self, mocker):
+        mock_db = MagicMock()
+        mock_db.run.side_effect = [
+            [["Fruits"]],
+            [["Vegetable", "Sour", "Green"], ["Berry", "Sweet", "Red"]],
+            [["Food_type"], ["Flavour"], ["Colour"]],
+        ]
+        mock_db.columns.return_value = [
+            {"name": "Food_type"},
+            {"name": "Flavour"},
+            {"name": "Colour"},
+        ]
+
+        with patch("src.extract_lambda.connect_to_database", return_value=mock_db):
+            mock_process_and_upload_tables = mocker.patch(
+                "src.extract_lambda.process_and_upload_tables", return_value=False
+            )
+            mock_list_existing_s3_files = mocker.patch(
+                "src.extract_lambda.list_existing_s3_files", return_value={}
+            )
+            event = {}
+            context = {}
+            response = lambda_handler(event, context)
+            assert response["statusCode"] == 200
+            assert (
+                json.loads(response["body"])
+                == "No changes detected, no CSV files were uploaded."
+            )
+            mock_list_existing_s3_files.assert_called_once()
+            mock_process_and_upload_tables.assert_called_once_with(mock_db, {})
+            mock_db.close.assert_called_once()
+
+    def test_lambda_handler_exception_error(self, mocker):
+        with patch(
+            "src.extract_lambda.connect_to_database",
+            side_effect=Exception("Database connection error"),
+        ):
+            mock_process_and_upload_tables = mocker.patch(
+                "src.extract_lambda.process_and_upload_tables"
+            )
+            mock_list_existing_s3_files = mocker.patch(
+                "src.extract_lambda.list_existing_s3_files"
+            )
+            event = {}
+            context = {}
+            response = lambda_handler(event, context)
+            assert response["statusCode"] == 500
+            assert json.loads(response["body"]) == "Internal server error."
+            mock_list_existing_s3_files.assert_not_called()
+            mock_process_and_upload_tables.assert_not_called()
+
 
 class TestListExistingS3Files:
     def test_error_if_no_bucket(self, s3_client, caplog):
-
         logger = logging.getLogger()
-        logger.info('Testing now.')
+        logger.info("Testing now.")
         caplog.set_level(logging.ERROR)
         list_existing_s3_files(client=s3_client)
-        assert 'Error listing S3 objects' in caplog.text
+        assert "Error listing S3 objects" in caplog.text
 
     def test_error_if_bucket_is_empty(self, s3_client, caplog):
-
-        s3_client.create_bucket(Bucket='extract_bucket', 
-                                CreateBucketConfiguration={
-                                    'LocationConstraint': 'eu-west-2'
-                                })
+        s3_client.create_bucket(
+            Bucket="extract_bucket",
+            CreateBucketConfiguration={"LocationConstraint": "eu-west-2"},
+        )
         list_existing_s3_files(client=s3_client)
-        assert 'The bucket is empty' in caplog.text 
+        assert "The bucket is empty" in caplog.text
 
     def test_error_retrieving_object(self, s3_client, caplog):
-        s3_client.upload_file('tests/dummy.txt', 'extract_bucket', 'dummy.txt')
-        list_existing_s3_files(bucket_name='test_bucket', client=s3_client)
+        s3_client.upload_file("tests/dummy.txt", "extract_bucket", "dummy.txt")
+        list_existing_s3_files(bucket_name="test_bucket", client=s3_client)
 
-        assert 'Error retrieving S3 object ' in caplog.text
+        assert "Error retrieving S3 object " in caplog.text
 
     def test_retrieves_file_content(self, s3_client, caplog):
         result = list_existing_s3_files(client=s3_client)
 
-        assert list(result.values()) == ['This is a test file.'] 
+        assert list(result.values()) == ["This is a test file."]
+
 
 class TestConnectToDatabase:
     def test_connect_to_database(mock_conn, mock_config):
-        with patch("src.extract_lambda.Connection", autospec=True) as mock_conn:  
+        with patch("src.extract_lambda.Connection", autospec=True) as mock_conn:
             connect_to_database()
             mock_conn.assert_called_with(
-            host="abc", user="def", port="5432", password="password", database="db"
+                host="abc", user="def", port="5432", password="password", database="db"
             )
 
     def test_database_error(self, mock_config):
@@ -76,12 +169,14 @@ class TestConnectToDatabase:
 
     def test_logs_interface_error(self, caplog):
         logger = logging.getLogger()
-        logger.info('Testing now.')
+        logger.info("Testing now.")
         caplog.set_level(logging.ERROR)
         with pytest.raises(DBConnectionException):
             connect_to_database()
-        assert 'Interface error' in caplog.text
-'''
+        assert "Interface error" in caplog.text
+
+
+"""
 class TestProcessAndUploadTables:
     def test_error_process_and_upload_tables(mock_conn, mock_config, s3_client, caplog):
         logger = logging.getLogger()
@@ -106,4 +201,4 @@ class TestProcessAndUploadTables:
             s3_client.upload_file('tests/dummy_identical.csv', 'extract_bucket', s3_key)
             process_and_upload_tables(mock_db(), existing_files, client=s3_client)
             assert 'No new data.' in caplog.text
-'''
\ No newline at end of file
+"""
-- 
cgit v1.2.3


From 24a4573d6cf64ec0383ae16bfba09a0ffdb8c129 Mon Sep 17 00:00:00 2001
From: T-Aji <tolujbd2@gmail.com>
Date: Mon, 19 Aug 2024 11:49:08 +0100
Subject: update .gitignore

---
 .gitignore | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/.gitignore b/.gitignore
index bceab93..6aa03fc 100644
--- a/.gitignore
+++ b/.gitignore
@@ -14,5 +14,4 @@ __pycache__/
 
 # OS-Related Files
 .DS_Store
-
-*venv*
+venv
\ No newline at end of file
-- 
cgit v1.2.3


From 444bb270fc8f758f33b0477c992b6a8e873bcd89 Mon Sep 17 00:00:00 2001
From: "deepsource-autofix[bot]"
 <62050782+deepsource-autofix[bot]@users.noreply.github.com>
Date: Mon, 19 Aug 2024 11:06:02 +0000
Subject: style: format code with Autopep8, Black and Ruff Formatter

This commit fixes the style issues introduced in 0eff70f according to the output
from Autopep8, Black and Ruff Formatter.

Details: https://github.com/ajschofield/de-project-bentley/pull/59
---
 tests/test_extract_lambda.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tests/test_extract_lambda.py b/tests/test_extract_lambda.py
index fc68a4a..7707cbf 100644
--- a/tests/test_extract_lambda.py
+++ b/tests/test_extract_lambda.py
@@ -42,6 +42,7 @@ def s3_client(aws_credentials):
     with mock_aws():
         yield boto3.client("s3")
 
+
 class TestLambdaHandler:
     def test_lambda_handler_files_processed_and_uploaded_successfully(self, mocker):
         mock_db = MagicMock()
@@ -125,6 +126,7 @@ class TestLambdaHandler:
             mock_list_existing_s3_files.assert_not_called()
             mock_process_and_upload_tables.assert_not_called()
 
+
 class TestListExistingS3Files:
     def test_error_if_no_bucket(self, s3_client, caplog):
         logger = logging.getLogger()
-- 
cgit v1.2.3