aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAlex <git@ajschof.me>2024-08-15 19:51:49 +0100
committerGitHub <noreply@github.com>2024-08-15 19:51:49 +0100
commit0358d491ace5859fe5140e439ec4f1fd80a67bb0 (patch)
treea690741aac60485522a578b655e7b32418172f44
parent733cb2eae5ab77c71c24747b5652ce135bb4efe4 (diff)
parent83a05489bd5ec56f51ede83f52f8babcaa7eef70 (diff)
downloadde-project-bentley-0358d491ace5859fe5140e439ec4f1fd80a67bb0.tar.gz
de-project-bentley-0358d491ace5859fe5140e439ec4f1fd80a67bb0.zip
Merge branch 'development' into chore/gitignore
-rw-r--r--.github/workflows/deploy.yml37
-rw-r--r--.github/workflows/on-commit.yml50
-rw-r--r--.github/workflows/python.yml50
-rw-r--r--.github/workflows/terraform.yml37
-rw-r--r--.gitignore1
-rw-r--r--requirements.txt30
-rw-r--r--src/extract_lambda.py166
-rw-r--r--src/secrets_manager.py48
-rw-r--r--terraform/rds.tf42
-rw-r--r--terraform/vars.tf4
-rw-r--r--test.py0
-rw-r--r--test/test_secrets_manager.py34
-rw-r--r--tests/dummy.txt1
-rw-r--r--tests/dummy_identical.csv4
-rw-r--r--tests/test_extract_lambda.py109
15 files changed, 483 insertions, 130 deletions
diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml
new file mode 100644
index 0000000..372d0b3
--- /dev/null
+++ b/.github/workflows/deploy.yml
@@ -0,0 +1,37 @@
+name: deploy-terraform
+
+on:
+ push:
+ branches:
+ - test-ci/** # Adjust the branch based on our deployment strategy
+
+jobs:
+ deploy-terraform:
+ name: Deploy Terraform
+ runs-on: ubuntu-latest
+ environment: test-env
+ steps:
+ - name: Checkout Repo
+ uses: actions/checkout@v4
+
+ - name: Install Terraform
+ uses: hashicorp/setup-terraform@v3
+
+ - name: Configure AWS Credentials
+ uses: aws-actions/configure-aws-credentials@v4
+ with:
+ aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
+ aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
+ aws-region: ${{ secrets.AWS_REGION }}
+
+ - name: Terraform Init
+ working-directory: terraform
+ run: terraform init
+
+ - name: Terraform Plan
+ working-directory: terraform
+ run: terraform plan
+
+ - name: Terraform Apply
+ working-directory: terraform
+ run: terraform apply --auto-approve \ No newline at end of file
diff --git a/.github/workflows/on-commit.yml b/.github/workflows/on-commit.yml
new file mode 100644
index 0000000..fd9ffb8
--- /dev/null
+++ b/.github/workflows/on-commit.yml
@@ -0,0 +1,50 @@
+name: commit-qc-checks
+
+on:
+ push:
+ branches-ignore:
+ - 'main'
+
+jobs:
+ python-quality-checks:
+ runs-on: ubuntu-latest
+ steps:
+ - uses : actions/checkout@v4
+ - name : 'Python: Setup'
+ uses : actions/setup-python@v5
+ with:
+ python-version: 3.11
+ - name : 'Python: Install Dependencies'
+ run: |
+ python -m pip install --upgrade pip
+ pip install flake8 pylint black bandit safety
+ continue-on-error: true
+ - name : 'Python: Linting'
+ run: |
+ flake8 .
+ find . -name "*.py" | xargs pylint
+ continue-on-error: true
+ - name : 'Python: Formatting'
+ run: |
+ black --check .
+ continue-on-error: true
+ terraform-quality-checks:
+ runs-on: ubuntu-latest
+ steps:
+ - uses : actions/checkout@v4
+ - name: 'Terraform: Setup'
+ uses: hashicorp/setup-terraform@v3
+ with:
+ terraform_version: latest
+ - name: 'Terraform: Formatting'
+ working-directory: terraform
+ run: terraform fmt -check -recursive
+ continue-on-error: true
+ - name: 'Terraform: Initialise'
+ working-directory: terraform
+ run: terraform init -backend=false
+ continue-on-error: true
+ - name: 'Terraform: Validate'
+ working-directory: terraform
+ run: terraform validate
+ continue-on-error: true \ No newline at end of file
diff --git a/.github/workflows/python.yml b/.github/workflows/python.yml
deleted file mode 100644
index 7d5b5b1..0000000
--- a/.github/workflows/python.yml
+++ /dev/null
@@ -1,50 +0,0 @@
-name: python-quality-checks
-
-on:
- push:
- branches: [development]
- pull_request:
- branches: [development, staging]
-
-jobs:
-
- check-if-py-files-exist:
- runs-on: ubuntu-latest
- outputs:
- py_files_exist: ${{ steps.check.outputs.py_files_exist }}
- steps:
- - uses: actions/checkout@v2
- - id: check_files
- run: |
- if [ -n "$(find . -name '*.py')" ]; then
- echo "::set-output name=py_files_exist::true"
- else
- echo "::set-output name=py_files_exist::false"
- fi
-
- quality-checks:
- needs: check-if-py-files-exist
- if: ${{ needs.check-if-py-files-exist.outputs.py_files_exist == 'true' }}
- runs-on: ubuntu-latest
- steps:
- - uses : actions/checkout@v2
- - name : Setup
- uses : actions/setup-python@v2
- with:
- python-version: 3.11
- - name : Dependencies
- run: |
- python -m pip install --upgrade pip
- pip install flake8 pylint black bandit safety
- - name : Linting
- run: |
- flake8 .
- find . -name "*.py" | xargs pylint
- - name : Formatting
- run: |
- black --check .
- - name: Security
- run: |
- bandit -r .
- safety check
- \ No newline at end of file
diff --git a/.github/workflows/terraform.yml b/.github/workflows/terraform.yml
deleted file mode 100644
index c349756..0000000
--- a/.github/workflows/terraform.yml
+++ /dev/null
@@ -1,37 +0,0 @@
-name: terraform-quality-checks
-
-on:
- push:
- branches: [development]
- paths:
- - 'terraform/**.tf'
- - 'terraform/**.tfvars'
- pull_request:
- branches: [development, staging]
- paths:
- - 'terraform/**.tf'
- - 'terraform/**.tfvars'
-jobs:
- terraform-validation:
- runs-on: ubuntu-latest
- defaults:
- run:
- working-directory: ./terraform
- steps:
- - uses: actions/checkout@v2
- - name: Setup Terraform
- uses: hashicorp/setup-terraform@v1
- with:
- terraform_version: latest # Using the latest version, but not sure if it's the best practice
- - name: Format
- run: terraform fmt -check -recursive
- - name: Init
- run: terraform init -backend=false
- - name: Validate
- run: terraform validate
- - name: Setup TFLint
- uses: terraform-linters/setup-tflint@v2
- with:
- tflint_version: latest
- - name: Run TFLint
- run: tflint -f compact \ No newline at end of file
diff --git a/.gitignore b/.gitignore
index cd44594..ca15434 100644
--- a/.gitignore
+++ b/.gitignore
@@ -10,6 +10,7 @@
# Output Files
*.zip
log*
+__pycache__/
# OS-Related Files
.DS_Store
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000..6f383f9
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,30 @@
+asn1crypto==1.5.1
+boto3==1.34.159
+botocore==1.34.159
+certifi==2024.7.4
+cffi==1.17.0
+charset-normalizer==3.3.2
+cryptography==43.0.0
+idna==3.7
+iniconfig==2.0.0
+Jinja2==3.1.4
+jmespath==1.0.1
+MarkupSafe==2.1.5
+moto==5.0.12
+packaging==24.1
+pg8000==1.31.2
+pluggy==1.5.0
+pycparser==2.22
+pytest==8.3.2
+pytest-mock==3.14.0
+python-dateutil==2.9.0.post0
+python-dotenv==1.0.1
+PyYAML==6.0.2
+requests==2.32.3
+responses==0.25.3
+s3transfer==0.10.2
+scramp==1.4.5
+six==1.16.0
+urllib3==2.2.2
+Werkzeug==3.0.3
+xmltodict==0.13.0 \ No newline at end of file
diff --git a/src/extract_lambda.py b/src/extract_lambda.py
index 7d56c66..fb2d7e8 100644
--- a/src/extract_lambda.py
+++ b/src/extract_lambda.py
@@ -1,32 +1,156 @@
-from pg8000.native import Connection, Error, DatabaseError, InterfaceError
-from dotenv import load_dotenv
-import os
+from pg8000.native import Connection, DatabaseError, InterfaceError
+from dotenv import dotenv_values
+import boto3
+import csv
+from botocore.exceptions import ClientError
+import logging
+import json
+from datetime import datetime
+import re
-load_dotenv()
-def extract():
+logger = logging.getLogger()
+logger.setLevel(logging.INFO)
-# temporary credentials for dev- will not have access when uploaded
- database = os.getenv('database')
- user = os.getenv('user')
- password = os.getenv('password')
- host = os.getenv('host')
- port = os.getenv('port')
+class DBConnectionException(Exception):
+ """Wraps pg8000.native Error or DatabaseError."""
+ def __init__(self, e):
+ """Initialise with provided error message."""
+ self.message = str(e)
+ super().__init__(self.message)
+def lambda_handler(event, context):
+ """This lambda function connects to the Totesys database, lists the contents of the ingestion bucket,
+ and converts all tables to CSV and if any of those tables do not exist in, or are different to the ones in s3, it uploads them
+ it uses 3 helper functions to achieve these 3 functionalities
+ """
try:
- db = Connection.run(
- database=database,
- user=user,
- password=password,
- host=host,
- port=port
+ db = connect_to_database()
+ existing_files = list_existing_s3_files()
+ any_changes = process_and_upload_tables(db, existing_files)
+
+ if not any_changes:
+ logger.info("No changes detected in the database.")
+ return {
+ 'statusCode': 200,
+ 'body': json.dumps('No changes detected, no CSV files were uploaded.')
+ }
+ else:
+ return {
+ 'statusCode': 200,
+ 'body': json.dumps('CSV files processed and uploaded successfully.')
+ }
+
+ except Exception as e:
+ logger.error(f'Error: {e}')
+ return {
+ 'statusCode': 500,
+ 'body': json.dumps('Internal server error.')
+ }
+
+ finally:
+
+ if db:
+ db.close()
+
+def get_config(path: str = ".env") -> dict:
+ return dotenv_values(path)
+
+
+def connect_to_database() -> Connection:
+ try:
+ config = get_config()
+ host = config["host"]
+ port = config["port"]
+ user = config["user"]
+ password = config["password"]
+ database = config["database"]
+
+ return Connection(
+ database=database,
+ user=user,
+ password=password,
+ host=host,
+ port=port
)
- except DatabaseError as e:
- print(e)
except InterfaceError as i:
- print(i)
+ logger.error(f'Interface error: {i}')
+ raise DBConnectionException("Failed to connect to database")
+
+
+
+def list_existing_s3_files(bucket_name='extract_bucket', client=boto3.client('s3')):
+ """Creates a dictionary and populates it with the
+ results of listing the contents of the s3 bucket, then
+ returns the populated dictionary
+ """
+
+ existing_files = {}
+
+ try:
+ response = client.list_objects_v2(Bucket='extract_bucket')
+
+ if 'Contents' in response:
+ for obj in response['Contents']:
+ s3_key = obj['Key']
+ try:
+ file_obj = client.get_object(Bucket=bucket_name, Key=s3_key)
+ file_content = file_obj['Body'].read().decode('utf-8')
+ existing_files[s3_key] = file_content
+ except ClientError as e:
+ logger.error(f'Error retrieving S3 object {s3_key}: {e}')
+ else:
+ logger.error('The bucket is empty')
+
+ except ClientError as e:
+ logger.error(f'Error listing S3 objects: {e}')
+
+ return existing_files
+
+
+
+def process_and_upload_tables(db, existing_files, client=boto3.client('s3')):
+ """Creates a list of the tables from a database query and
+ then selects everything from each table in individual queries
+ it then writes each table to CSV files and compares with the item
+ in the existing_files dictionary with the same name. If it finds any changes
+ to files, or new tables/files it uploads them to the s3 bucket
+ """
+ ## NEW CODE
+ all_datetimes = []
+ for file_names in existing_files.keys():
+ datetime_str_on_s3 = ''.join(re.search(r'\/(.+/).+_(.+)\.csv',file_names).group(1,2))
+ all_datetimes.append(datetime.strptime(datetime_str_on_s3, '%Y/%m/%d/%H:%M:%S'))
+ latest_timestamp = max(all_datetimes)
+ ## END OF NEW CODE
+ tables = db.run("SELECT table_name FROM information_schema.tables WHERE table_schema='public' AND table_type='BASE TABLE';")
+ print(tables)
+ for table in tables:
+ table_name = table[0]
+ rows = db.run(f"SELECT * FROM {table_name};")
+
- \ No newline at end of file
+ csv_file_path = f"/tmp/{table_name}.csv"
+ with open(csv_file_path, "w", newline='') as file:
+ writer = csv.writer(file)
+ #column_names = [desc["name"] for desc in db.columns(f"SELECT * FROM {table_name};")]
+ column_names = [col_name[0] for col_name in db.run(f"SELECT column_name FROM INFORMATION_SCHEMA.COLUMNS where table_name = '{table_name}';")]
+ writer.writerow(column_names)
+ writer.writerows(rows)
+ s3_key = datetime.strftime(datetime.today(),f'{table_name}/%Y/%m/%d/{table_name}_%H:%M:%S.csv')
+ new_csv_content = open(csv_file_path, "r").read()
+ ## NEW CODE
+ latest_s3_object_key = datetime.strftime(latest_timestamp,f'{table_name}/%Y/%m/%d/{table_name}_%H:%M:%S.csv')
+ ## END OF NEW CODE
+ if existing_files[latest_s3_object_key] != new_csv_content:
+ try:
+ client.upload_file(csv_file_path, 'extract_bucket', s3_key)
+ logger.info(f"Uploaded {s3_key} to S3.")
+ except ClientError as e:
+ logger.error(f'Error uploading to S3: {e}')
+ else:
+ logger.info(f"No new data.")
+ \ No newline at end of file
diff --git a/src/secrets_manager.py b/src/secrets_manager.py
new file mode 100644
index 0000000..c0fb61e
--- /dev/null
+++ b/src/secrets_manager.py
@@ -0,0 +1,48 @@
+import boto3
+from botocore.exceptions import ClientError
+import json
+
+
+def sm_client():
+ sm_client = boto3.client('secretsmanager')
+ yield sm_client
+
+def create_secret(sm_client, secret_name, cohort_id, user, password, host, database, port):
+ secret = {
+ "cohort_id": cohort_id,
+ "user": user,
+ "password": password,
+ "host": host,
+ "database": database,
+ "port": port
+ }
+
+ response = sm_client.create_secret(
+ Name = secret_name,
+ SecretString = json.dumps(secret)
+ )
+
+ print(response)
+ return response
+
+def list_secret(sm_client):
+ response = sm_client.list_secrets()
+ secret_dict = response['SecretList']
+ secret_names = []
+ for items in secret_dict:
+ secret_names.append(items['Name'])
+ print(f'{len(secret_names)} secret(s) available')
+ for name in secret_names:
+ print(name)
+ return secret_names
+
+def retrieve_secrets(sm_client):
+ response = sm_client.get_secrets(
+
+ )
+
+
+
+#retrieve secret
+#so lambda can access totesy db
+#so lambda connect to the db and then retrieve the data \ No newline at end of file
diff --git a/terraform/rds.tf b/terraform/rds.tf
index 4b25c5f..88783b7 100644
--- a/terraform/rds.tf
+++ b/terraform/rds.tf
@@ -2,9 +2,9 @@ data "aws_availability_zones" "available" {}
module "vpc" {
source = "terraform-aws-modules/vpc/aws"
- version = "2.77.0"
+ version = "5.12.1"
- name = "${var.project_name}"
+ name = var.project_name
cidr = "10.0.0.0/16"
azs = data.aws_availability_zones.available.names
public_subnets = ["10.0.4.0/24", "10.0.5.0/24", "10.0.6.0/24"]
@@ -13,7 +13,7 @@ module "vpc" {
}
resource "aws_db_subnet_group" "Terrific-Totes-sub-gr" {
- name = "TT-db-subnet"
+ name = "tt-db-subnet"
subnet_ids = module.vpc.public_subnets
tags = {
@@ -45,7 +45,7 @@ resource "aws_security_group" "rds" {
}
resource "aws_db_parameter_group" "Terrific-Totes-param-gr" {
- name = "TT-db-param"
+ name = "tt-db-param"
family = "postgres14"
parameter {
@@ -54,25 +54,27 @@ resource "aws_db_parameter_group" "Terrific-Totes-param-gr" {
}
}
-resource "aws_db_instance" "Terrific-Totes-rds" {
- db_name = "${var.project_name}"
- instance_class = "db.t3.micro"
- allocated_storage = 5
- engine = "postgres"
- engine_version = "14.1"
- username = "user credentials for the root user" # we could use .env here
- password = "user password for the root user" # we could use .env here
+resource "aws_db_instance" "terrific-totes-rds" {
+ db_name = var.project_name
+ instance_class = "db.t3.micro"
+ allocated_storage = 5
+ engine = "postgres"
+ engine_version = "14.10"
+ username = "totes"
+ password = "totes123"
+ # username = "user credentials for the root user" # we could use .env here
+ # password = "user password for the root user" # we could use .env here
### alternatively to providing username nad password we can specify:
-# resource "aws_kms_key" "example_key" {
-# description = "Example KMS Key"
-# }
-# within the resource:
-# manage_master_user_password = true
-# master_user_secret_kms_key_id = aws_kms_key.example.key_id
-# }
+ # resource "aws_kms_key" "example_key" {
+ # description = "Example KMS Key"
+ # }
+ # within the resource:
+ # manage_master_user_password = true
+ # master_user_secret_kms_key_id = aws_kms_key.example.key_id
+ # }
db_subnet_group_name = aws_db_subnet_group.Terrific-Totes-sub-gr.name
vpc_security_group_ids = [aws_security_group.rds.id]
parameter_group_name = aws_db_parameter_group.Terrific-Totes-param-gr.name
publicly_accessible = false
skip_final_snapshot = true
-} \ No newline at end of file
+}
diff --git a/terraform/vars.tf b/terraform/vars.tf
index d5cdafb..3c88731 100644
--- a/terraform/vars.tf
+++ b/terraform/vars.tf
@@ -29,8 +29,8 @@ variable "load_lambda_name" {
}
variable "project_name" {
- type = string
- default = "Terrific-Totes"
+ type = string
+ default = "tt"
}
data "aws_caller_identity" "current" {}
diff --git a/test.py b/test.py
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/test.py
diff --git a/test/test_secrets_manager.py b/test/test_secrets_manager.py
new file mode 100644
index 0000000..86533bc
--- /dev/null
+++ b/test/test_secrets_manager.py
@@ -0,0 +1,34 @@
+from src.secrets_manager import sm_client, create_secret, list_secret
+import boto3
+from moto import mock_aws
+import json
+import pytest
+import os
+
+pytest.fixture(scope='class')
+def mock_aws_credentials():
+ """Mocked AWS Credentials for moto."""
+ os.environ["AWS_ACCESS_KEY_ID"] = "testing"
+ os.environ["AWS_SECRET_ACCESS_KEY"] = "testing"
+ os.environ["AWS_SECURITY_TOKEN"] = "testing"
+ os.environ["AWS_SESSION_TOKEN"] = "testing"
+ os.environ["AWS_DEFAULT_REGION"] = "eu-west-2"
+
+@pytest.fixture(scope='class')
+def mock_sm_client(mock_aws_credentials):
+ with mock_aws():
+ yield boto3.client('secretsmanager')
+
+
+def test_create_secret_stores_secrets(mock_sm_client):
+ cohort_id = "test_cohort_id"
+ user = "test_user_id"
+ password = "test_password"
+ host = "test_host"
+ database = "test_database"
+ port = "test_port"
+
+ secret_name = "test_secret"
+ response = create_secret(mock_sm_client, secret_name, cohort_id, user, password, host, database, port)
+
+ assert response['Name'] == secret_name \ No newline at end of file
diff --git a/tests/dummy.txt b/tests/dummy.txt
new file mode 100644
index 0000000..af27ff4
--- /dev/null
+++ b/tests/dummy.txt
@@ -0,0 +1 @@
+This is a test file. \ No newline at end of file
diff --git a/tests/dummy_identical.csv b/tests/dummy_identical.csv
new file mode 100644
index 0000000..fdd8993
--- /dev/null
+++ b/tests/dummy_identical.csv
@@ -0,0 +1,4 @@
+Food_type,Flavour,Colour
+Vegetable,Sour,Green
+Berry,Sweet,Red
+
diff --git a/tests/test_extract_lambda.py b/tests/test_extract_lambda.py
new file mode 100644
index 0000000..e94a8a4
--- /dev/null
+++ b/tests/test_extract_lambda.py
@@ -0,0 +1,109 @@
+import pytest
+import boto3
+from moto import mock_aws
+from unittest.mock import patch, MagicMock
+from unittest import TestCase
+from src.extract_lambda import list_existing_s3_files, connect_to_database, DBConnectionException, process_and_upload_tables
+import os
+import logging
+
+@pytest.fixture(scope='class')
+def mock_config():
+ env_vars = {
+ "host": "abc",
+ "port": "5432",
+ "user": "def",
+ "password": "password",
+ "database": "db",
+ }
+ with patch("src.extract_lambda.get_config", return_value=env_vars) as mock_config:
+ yield mock_config
+
+
+@pytest.fixture(scope='class')
+def aws_credentials():
+ os.environ["AWS_ACCESS_KEY_ID"] = 'testing'
+ os.environ["AWS_SECRET_ACCESS_KEY"] = 'testing'
+ os.environ["AWS_SECURIT_TOKEN"] = 'testing'
+ os.environ["AWS_SESSION_TOKEN"] = 'testing'
+ os.environ["AWS_DEFAULT_REGION"]= 'eu-west-2'
+
+@pytest.fixture(scope='class')
+def s3_client(aws_credentials):
+ with mock_aws():
+ yield boto3.client('s3')
+
+class TestListExistingS3Files:
+ def test_error_if_no_bucket(self, s3_client, caplog):
+
+ logger = logging.getLogger()
+ logger.info('Testing now.')
+ caplog.set_level(logging.ERROR)
+ list_existing_s3_files(client=s3_client)
+ assert 'Error listing S3 objects' in caplog.text
+
+ def test_error_if_bucket_is_empty(self, s3_client, caplog):
+
+ s3_client.create_bucket(Bucket='extract_bucket',
+ CreateBucketConfiguration={
+ 'LocationConstraint': 'eu-west-2'
+ })
+ list_existing_s3_files(client=s3_client)
+ assert 'The bucket is empty' in caplog.text
+
+ def test_error_retrieving_object(self, s3_client, caplog):
+ s3_client.upload_file('tests/dummy.txt', 'extract_bucket', 'dummy.txt')
+ list_existing_s3_files(bucket_name='test_bucket', client=s3_client)
+
+ assert 'Error retrieving S3 object ' in caplog.text
+
+ def test_retrieves_file_content(self, s3_client, caplog):
+ result = list_existing_s3_files(client=s3_client)
+
+ assert list(result.values()) == ['This is a test file.']
+
+class TestConnectToDatabase:
+ def test_connect_to_database(mock_conn, mock_config):
+ with patch("src.extract_lambda.Connection", autospec=True) as mock_conn:
+ connect_to_database()
+ mock_conn.assert_called_with(
+ host="abc", user="def", port="5432", password="password", database="db"
+ )
+
+ def test_database_error(self, mock_config):
+ with pytest.raises(DBConnectionException):
+ connect_to_database()
+
+ def test_logs_interface_error(self, caplog):
+ logger = logging.getLogger()
+ logger.info('Testing now.')
+ caplog.set_level(logging.ERROR)
+ with pytest.raises(DBConnectionException):
+ connect_to_database()
+ assert 'Interface error' in caplog.text
+'''
+class TestProcessAndUploadTables:
+ def test_error_process_and_upload_tables(mock_conn, mock_config, s3_client, caplog):
+ logger = logging.getLogger()
+ logger.info('Testing now.')
+ caplog.set_level(logging.ERROR)
+ ####
+ queries = ["SELECT table_name FROM information_schema.tables WHERE table_schema='public' AND table_type='BASE TABLE';",
+ "SELECT * FROM Fruits;",
+ "SELECT column_name FROM INFORMATION_SCHEMA.COLUMNS where table_name = 'Fruits'"]
+ return_values = [[['Fruits']],
+ [['Vegetable','Sour','Green'],['Berry','Sweet','Red']],
+ [['Food_type'],['Flavour'],['Colour']]]
+ vals = dict(zip(queries,return_values))
+
+ ####
+ with patch('src.extract_lambda.connect_to_database') as mock_db:
+ mock_db().run.side_effects = return_values
+ s3_key = 'Fruits/2024/08/15/Fruits_16:46:30.csv'
+ existing_files = {s3_key: 'Food_type,Flavour,Colour\nFruit,Sour,Green\nBerry,Sweet,Red'}
+ s3_client.create_bucket(Bucket='extract_bucket',
+ CreateBucketConfiguration={'LocationConstraint': 'eu-west-2'})
+ s3_client.upload_file('tests/dummy_identical.csv', 'extract_bucket', s3_key)
+ process_and_upload_tables(mock_db(), existing_files, client=s3_client)
+ assert 'No new data.' in caplog.text
+''' \ No newline at end of file
git.ajschof.me — hosted by ajschofield — powered by cgit