15 files changed, 483 insertions, 130 deletions
diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml
new file mode 100644
index 0000000..372d0b3
--- /dev/null
+++ b/.github/workflows/deploy.yml
@@ -0,0 +1,37 @@
+name: deploy-terraform
+
+on:
+  push:
+    branches:
+      - test-ci/**  # Adjust the branch based on our deployment strategy
+
+jobs:
+  deploy-terraform:
+    name: Deploy Terraform
+    runs-on: ubuntu-latest
+    environment: test-env
+    steps:
+      - name: Checkout Repo
+        uses: actions/checkout@v4
+
+      - name: Install Terraform
+        uses: hashicorp/setup-terraform@v3
+
+      - name: Configure AWS Credentials
+        uses: aws-actions/configure-aws-credentials@v4
+        with:
+          aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
+          aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
+          aws-region: ${{ secrets.AWS_REGION }}
+
+      - name: Terraform Init
+        working-directory: terraform
+        run: terraform init
+
+      - name: Terraform Plan
+        working-directory: terraform
+        run: terraform plan
+
+      - name: Terraform Apply
+        working-directory: terraform
+        run: terraform apply --auto-approve
+\ No newline at end of file
diff --git a/.github/workflows/on-commit.yml b/.github/workflows/on-commit.yml
new file mode 100644
index 0000000..fd9ffb8
--- /dev/null
+++ b/.github/workflows/on-commit.yml
@@ -0,0 +1,50 @@
+name: commit-qc-checks
+
+on:
+    push:
+        branches-ignore:
+            - 'main'
+
+jobs:
+    python-quality-checks:
+        runs-on: ubuntu-latest
+        steps:
+            - uses : actions/checkout@v4
+            - name : 'Python: Setup'
+              uses : actions/setup-python@v5
+              with:
+                python-version: 3.11
+            - name : 'Python: Install Dependencies'
+              run: |
+                python -m pip install --upgrade pip
+                pip install flake8 pylint black bandit safety
+              continue-on-error: true
+            - name : 'Python: Linting'
+              run: |
+                flake8 .
+                find . -name "*.py" | xargs pylint
+              continue-on-error: true
+            - name : 'Python: Formatting'
+              run: |
+                black --check .
+              continue-on-error: true
+    terraform-quality-checks:
+        runs-on: ubuntu-latest
+        steps:
+            - uses : actions/checkout@v4
+            - name: 'Terraform: Setup'
+              uses: hashicorp/setup-terraform@v3
+              with:
+                terraform_version: latest
+            - name: 'Terraform: Formatting'
+              working-directory: terraform
+              run: terraform fmt -check -recursive
+              continue-on-error: true
+            - name: 'Terraform: Initialise'
+              working-directory: terraform
+              run: terraform init -backend=false
+              continue-on-error: true
+            - name: 'Terraform: Validate'
+              working-directory: terraform
+              run: terraform validate
+              continue-on-error: true
+\ No newline at end of file
diff --git a/.github/workflows/python.yml b/.github/workflows/python.yml
deleted file mode 100644
index 7d5b5b1..0000000
--- a/.github/workflows/python.yml
+++ /dev/null
@@ -1,50 +0,0 @@
-name: python-quality-checks
-
-on:
-    push:
-        branches: [development]
-    pull_request:
-        branches: [development, staging]
-
-jobs:
-
-    check-if-py-files-exist:
-        runs-on: ubuntu-latest
-        outputs:
-            py_files_exist: ${{ steps.check.outputs.py_files_exist }}
-        steps:
-            - uses: actions/checkout@v2
-            - id: check_files
-              run: |
-                if [ -n "$(find . -name '*.py')" ]; then
-                    echo "::set-output name=py_files_exist::true"
-                else
-                    echo "::set-output name=py_files_exist::false"
-                fi
-
-    quality-checks:
-        needs: check-if-py-files-exist
-        if: ${{ needs.check-if-py-files-exist.outputs.py_files_exist == 'true' }}
-        runs-on: ubuntu-latest
-        steps:
-            - uses : actions/checkout@v2
-            - name : Setup
-              uses : actions/setup-python@v2
-              with:
-                python-version: 3.11
-            - name : Dependencies
-              run: |
-                python -m pip install --upgrade pip
-                pip install flake8 pylint black bandit safety
-            - name : Linting
-              run: |
-                flake8 .
-                find . -name "*.py" | xargs pylint
-            - name : Formatting
-              run: |
-                black --check .
-            - name: Security
-              run: |
-                bandit -r .
-                safety check
-                
-\ No newline at end of file
diff --git a/.github/workflows/terraform.yml b/.github/workflows/terraform.yml
deleted file mode 100644
index c349756..0000000
--- a/.github/workflows/terraform.yml
+++ /dev/null
@@ -1,37 +0,0 @@
-name: terraform-quality-checks
-
-on:
-    push:
-        branches: [development]
-        paths:
-            - 'terraform/**.tf'
-            - 'terraform/**.tfvars'
-    pull_request:
-        branches: [development, staging]
-        paths:
-            - 'terraform/**.tf'
-            - 'terraform/**.tfvars'
-jobs:
-    terraform-validation:
-        runs-on: ubuntu-latest
-        defaults:
-            run:
-                working-directory: ./terraform
-        steps:
-            - uses: actions/checkout@v2
-            - name: Setup Terraform
-              uses: hashicorp/setup-terraform@v1
-              with:
-                  terraform_version: latest # Using the latest version, but not sure if it's the best practice
-            - name: Format
-              run: terraform fmt -check -recursive
-            - name: Init
-              run: terraform init -backend=false
-            - name: Validate
-              run: terraform validate
-            - name: Setup TFLint
-              uses: terraform-linters/setup-tflint@v2
-              with:
-                  tflint_version: latest
-            - name: Run TFLint
-              run: tflint -f compact
-\ No newline at end of file
diff --git a/.gitignore b/.gitignore
index cd44594..ca15434 100644
--- a/.gitignore
+++ b/.gitignore
@@ -10,6 +10,7 @@
 # Output Files
 *.zip
 log*
+__pycache__/
 
 # OS-Related Files
 .DS_Store
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000..6f383f9
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,30 @@
+asn1crypto==1.5.1
+boto3==1.34.159
+botocore==1.34.159
+certifi==2024.7.4
+cffi==1.17.0
+charset-normalizer==3.3.2
+cryptography==43.0.0
+idna==3.7
+iniconfig==2.0.0
+Jinja2==3.1.4
+jmespath==1.0.1
+MarkupSafe==2.1.5
+moto==5.0.12
+packaging==24.1
+pg8000==1.31.2
+pluggy==1.5.0
+pycparser==2.22
+pytest==8.3.2
+pytest-mock==3.14.0
+python-dateutil==2.9.0.post0
+python-dotenv==1.0.1
+PyYAML==6.0.2
+requests==2.32.3
+responses==0.25.3
+s3transfer==0.10.2
+scramp==1.4.5
+six==1.16.0
+urllib3==2.2.2
+Werkzeug==3.0.3
+xmltodict==0.13.0
+\ No newline at end of file
diff --git a/src/extract_lambda.py b/src/extract_lambda.py
index 7d56c66..fb2d7e8 100644
--- a/src/extract_lambda.py
+++ b/src/extract_lambda.py
@@ -1,32 +1,156 @@
-from pg8000.native import Connection, Error, DatabaseError, InterfaceError
-from dotenv import load_dotenv
-import os
+from pg8000.native import Connection, DatabaseError, InterfaceError
+from dotenv import dotenv_values
+import boto3
+import csv
+from botocore.exceptions import ClientError
+import logging
+import json
+from datetime import datetime
+import re
 
-load_dotenv()
 
-def extract():
+logger = logging.getLogger()
+logger.setLevel(logging.INFO)
 
-# temporary credentials for dev- will not have access when uploaded
 
-    database = os.getenv('database')
-    user = os.getenv('user')
-    password = os.getenv('password')
-    host = os.getenv('host')
-    port = os.getenv('port')
+class DBConnectionException(Exception):
+    """Wraps pg8000.native Error or DatabaseError."""
 
+    def __init__(self, e):
+        """Initialise with provided error message."""
+        self.message = str(e)
+        super().__init__(self.message)
 
+def lambda_handler(event, context):
+    """This lambda function connects to the Totesys database, lists the contents of the ingestion bucket,
+       and converts all tables to CSV and if any of those tables do not exist in, or are different to the ones in s3, it uploads them
+       it uses 3 helper functions to achieve these 3 functionalities 
+    """
     try:
-        db = Connection.run(
-        database=database,
-        user=user,
-        password=password,
-        host=host,
-        port=port
+        db = connect_to_database()
+        existing_files = list_existing_s3_files()
+        any_changes = process_and_upload_tables(db, existing_files)
+        
+        if not any_changes:
+            logger.info("No changes detected in the database.")
+            return {
+                'statusCode': 200,
+                'body': json.dumps('No changes detected, no CSV files were uploaded.')
+            }
+        else:
+            return {
+                'statusCode': 200,
+                'body': json.dumps('CSV files processed and uploaded successfully.')
+            }
+        
+    except Exception as e:
+        logger.error(f'Error: {e}')
+        return {
+            'statusCode': 500,
+            'body': json.dumps('Internal server error.')
+        }
+    
+    finally:
+ 
+        if db:
+            db.close()
+
+def get_config(path: str = ".env") -> dict:
+    return dotenv_values(path)
+
+
+def connect_to_database() -> Connection:
+    try:
+        config = get_config()
+        host = config["host"]
+        port = config["port"]
+        user = config["user"]
+        password = config["password"]
+        database = config["database"]
+
+        return Connection(
+            database=database,
+            user=user,
+            password=password,
+            host=host,
+            port=port
         )
-    except DatabaseError as e:
-        print(e)
     except InterfaceError as i:
-        print(i)
+        logger.error(f'Interface error: {i}')
+        raise DBConnectionException("Failed to connect to database")
+
+
+
+def list_existing_s3_files(bucket_name='extract_bucket', client=boto3.client('s3')):
+    """Creates a dictionary and populates it with the 
+       results of listing the contents of the s3 bucket, then
+       returns the populated dictionary
+    """
+    
+    existing_files = {}
+    
+    try:
+        response = client.list_objects_v2(Bucket='extract_bucket')
+        
+        if 'Contents' in response:
+            for obj in response['Contents']:
+                s3_key = obj['Key']
+                try:
+                    file_obj = client.get_object(Bucket=bucket_name, Key=s3_key)
+                    file_content = file_obj['Body'].read().decode('utf-8')
+                    existing_files[s3_key] = file_content
+                except ClientError as e:
+                    logger.error(f'Error retrieving S3 object {s3_key}: {e}')
+        else:
+            logger.error('The bucket is empty')
+    
+    except ClientError as e:
+        logger.error(f'Error listing S3 objects: {e}')
+    
+    return existing_files
+
+
+
+def process_and_upload_tables(db, existing_files, client=boto3.client('s3')):
+    """Creates a list of the tables from a database query and 
+       then selects everything from each table in individual queries
+       it then writes each table to CSV files and compares with the item 
+       in the existing_files dictionary with the same name. If it finds any changes
+       to files, or new tables/files it uploads them to the s3 bucket
+    """
+    ## NEW CODE
+    all_datetimes = []
+    for file_names in existing_files.keys():
+        datetime_str_on_s3 = ''.join(re.search(r'\/(.+/).+_(.+)\.csv',file_names).group(1,2))
+        all_datetimes.append(datetime.strptime(datetime_str_on_s3, '%Y/%m/%d/%H:%M:%S'))
+    latest_timestamp = max(all_datetimes)
+    ## END OF NEW CODE
 
+    tables = db.run("SELECT table_name FROM information_schema.tables WHERE table_schema='public' AND table_type='BASE TABLE';")
+    print(tables)
+    for table in tables:
+        table_name = table[0]
+        rows = db.run(f"SELECT * FROM {table_name};")
+        
 
-    
-\ No newline at end of file
+        csv_file_path = f"/tmp/{table_name}.csv"
+        with open(csv_file_path, "w", newline='') as file:
+            writer = csv.writer(file)
+            #column_names = [desc["name"] for desc in db.columns(f"SELECT * FROM {table_name};")]
+            column_names = [col_name[0] for col_name in db.run(f"SELECT column_name FROM INFORMATION_SCHEMA.COLUMNS where table_name = '{table_name}';")]
+            writer.writerow(column_names)
+            writer.writerows(rows)
+        s3_key = datetime.strftime(datetime.today(),f'{table_name}/%Y/%m/%d/{table_name}_%H:%M:%S.csv')
+        new_csv_content = open(csv_file_path, "r").read()
+        ## NEW CODE
+        latest_s3_object_key = datetime.strftime(latest_timestamp,f'{table_name}/%Y/%m/%d/{table_name}_%H:%M:%S.csv')
+        ## END OF NEW CODE
+        if existing_files[latest_s3_object_key] != new_csv_content: 
+            try:
+                client.upload_file(csv_file_path, 'extract_bucket', s3_key)
+                logger.info(f"Uploaded {s3_key} to S3.")
+            except ClientError as e:
+                logger.error(f'Error uploading to S3: {e}')
+        else:
+            logger.info(f"No new data.")
+        
+\ No newline at end of file
diff --git a/src/secrets_manager.py b/src/secrets_manager.py
new file mode 100644
index 0000000..c0fb61e
--- /dev/null
+++ b/src/secrets_manager.py
@@ -0,0 +1,48 @@
+import boto3
+from botocore.exceptions import ClientError
+import json
+
+
+def sm_client():
+    sm_client = boto3.client('secretsmanager')
+    yield sm_client
+
+def create_secret(sm_client, secret_name, cohort_id, user, password, host, database, port):
+    secret = {
+        "cohort_id": cohort_id,
+        "user": user,
+        "password": password,
+        "host": host,
+        "database": database,
+        "port": port
+    }
+
+    response = sm_client.create_secret(
+        Name = secret_name,
+        SecretString = json.dumps(secret)
+    )
+
+    print(response)
+    return response
+
+def list_secret(sm_client):
+    response = sm_client.list_secrets()
+    secret_dict = response['SecretList']
+    secret_names = []
+    for items in secret_dict:
+        secret_names.append(items['Name'])
+    print(f'{len(secret_names)} secret(s) available')
+    for name in secret_names:
+        print(name)
+    return secret_names
+
+def retrieve_secrets(sm_client):
+    response = sm_client.get_secrets(
+        
+    )
+
+
+
+#retrieve secret
+#so lambda can access totesy db
+#so lambda connect to the db and then retrieve the data
+\ No newline at end of file
diff --git a/terraform/rds.tf b/terraform/rds.tf
index 4b25c5f..88783b7 100644
--- a/terraform/rds.tf
+++ b/terraform/rds.tf
@@ -2,9 +2,9 @@ data "aws_availability_zones" "available" {}
 
 module "vpc" {
   source  = "terraform-aws-modules/vpc/aws"
-  version = "2.77.0"
+  version = "5.12.1"
 
-  name                 = "${var.project_name}"
+  name                 = var.project_name
   cidr                 = "10.0.0.0/16"
   azs                  = data.aws_availability_zones.available.names
   public_subnets       = ["10.0.4.0/24", "10.0.5.0/24", "10.0.6.0/24"]
@@ -13,7 +13,7 @@ module "vpc" {
 }
 
 resource "aws_db_subnet_group" "Terrific-Totes-sub-gr" {
-  name       = "TT-db-subnet"
+  name       = "tt-db-subnet"
   subnet_ids = module.vpc.public_subnets
 
   tags = {
@@ -45,7 +45,7 @@ resource "aws_security_group" "rds" {
 }
 
 resource "aws_db_parameter_group" "Terrific-Totes-param-gr" {
-  name   = "TT-db-param"
+  name   = "tt-db-param"
   family = "postgres14"
 
   parameter {
@@ -54,25 +54,27 @@ resource "aws_db_parameter_group" "Terrific-Totes-param-gr" {
   }
 }
 
-resource "aws_db_instance" "Terrific-Totes-rds" {
-  db_name                = "${var.project_name}"
-  instance_class         = "db.t3.micro"
-  allocated_storage      = 5
-  engine                 = "postgres"
-  engine_version         = "14.1"
-  username               = "user credentials for the root user" # we could use .env here
-  password               = "user password for the root user" # we could use .env here
+resource "aws_db_instance" "terrific-totes-rds" {
+  db_name           = var.project_name
+  instance_class    = "db.t3.micro"
+  allocated_storage = 5
+  engine            = "postgres"
+  engine_version    = "14.10"
+  username          = "totes"
+  password          = "totes123"
+  # username          = "user credentials for the root user" # we could use .env here
+  # password          = "user password for the root user"    # we could use .env here
   ### alternatively to providing username nad password we can specify:
-# resource "aws_kms_key" "example_key" {
-#       description = "Example KMS Key"
-# }
-# within the resource:
-#   manage_master_user_password   = true
-#   master_user_secret_kms_key_id = aws_kms_key.example.key_id
-# }
+  # resource "aws_kms_key" "example_key" {
+  #       description = "Example KMS Key"
+  # }
+  # within the resource:
+  #   manage_master_user_password   = true
+  #   master_user_secret_kms_key_id = aws_kms_key.example.key_id
+  # }
   db_subnet_group_name   = aws_db_subnet_group.Terrific-Totes-sub-gr.name
   vpc_security_group_ids = [aws_security_group.rds.id]
   parameter_group_name   = aws_db_parameter_group.Terrific-Totes-param-gr.name
   publicly_accessible    = false
   skip_final_snapshot    = true
-}
-\ No newline at end of file
+}
diff --git a/terraform/vars.tf b/terraform/vars.tf
index d5cdafb..3c88731 100644
--- a/terraform/vars.tf
+++ b/terraform/vars.tf
@@ -29,8 +29,8 @@ variable "load_lambda_name" {
 }
 
 variable "project_name" {
-    type = string
-    default = "Terrific-Totes"
+  type    = string
+  default = "tt"
 }
 
 data "aws_caller_identity" "current" {}
diff --git a/test.py b/test.py
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/test.py
diff --git a/test/test_secrets_manager.py b/test/test_secrets_manager.py
new file mode 100644
index 0000000..86533bc
--- /dev/null
+++ b/test/test_secrets_manager.py
@@ -0,0 +1,34 @@
+from src.secrets_manager import sm_client, create_secret, list_secret
+import boto3
+from moto import mock_aws
+import json
+import pytest 
+import os
+
+pytest.fixture(scope='class')
+def mock_aws_credentials():
+    """Mocked AWS Credentials for moto."""
+    os.environ["AWS_ACCESS_KEY_ID"] = "testing"
+    os.environ["AWS_SECRET_ACCESS_KEY"] = "testing"
+    os.environ["AWS_SECURITY_TOKEN"] = "testing"
+    os.environ["AWS_SESSION_TOKEN"] = "testing"
+    os.environ["AWS_DEFAULT_REGION"] = "eu-west-2"
+
+@pytest.fixture(scope='class')
+def mock_sm_client(mock_aws_credentials):
+    with mock_aws():
+        yield boto3.client('secretsmanager')
+
+
+def test_create_secret_stores_secrets(mock_sm_client):
+    cohort_id = "test_cohort_id"
+    user = "test_user_id"
+    password = "test_password"
+    host = "test_host"
+    database = "test_database"
+    port = "test_port"
+
+    secret_name = "test_secret"
+    response = create_secret(mock_sm_client, secret_name, cohort_id, user, password, host, database, port)
+    
+    assert response['Name'] == secret_name
+\ No newline at end of file
diff --git a/tests/dummy.txt b/tests/dummy.txt
new file mode 100644
index 0000000..af27ff4
--- /dev/null
+++ b/tests/dummy.txt
@@ -0,0 +1 @@
+This is a test file.
+\ No newline at end of file
diff --git a/tests/dummy_identical.csv b/tests/dummy_identical.csv
new file mode 100644
index 0000000..fdd8993
--- /dev/null
+++ b/tests/dummy_identical.csv
@@ -0,0 +1,4 @@
+Food_type,Flavour,Colour
+Vegetable,Sour,Green
+Berry,Sweet,Red
+
diff --git a/tests/test_extract_lambda.py b/tests/test_extract_lambda.py
new file mode 100644
index 0000000..e94a8a4
--- /dev/null
+++ b/tests/test_extract_lambda.py
@@ -0,0 +1,109 @@
+import pytest
+import boto3
+from moto import mock_aws
+from unittest.mock import patch, MagicMock
+from unittest import TestCase
+from src.extract_lambda import list_existing_s3_files, connect_to_database, DBConnectionException, process_and_upload_tables
+import os 
+import logging
+
+@pytest.fixture(scope='class')
+def mock_config():
+    env_vars = {
+        "host": "abc",
+        "port": "5432",
+        "user": "def",
+        "password": "password",
+        "database": "db",
+    }
+    with patch("src.extract_lambda.get_config", return_value=env_vars) as mock_config:
+        yield mock_config
+
+
+@pytest.fixture(scope='class')
+def aws_credentials():
+    os.environ["AWS_ACCESS_KEY_ID"] = 'testing'
+    os.environ["AWS_SECRET_ACCESS_KEY"] = 'testing'
+    os.environ["AWS_SECURIT_TOKEN"] = 'testing'
+    os.environ["AWS_SESSION_TOKEN"] = 'testing'
+    os.environ["AWS_DEFAULT_REGION"]= 'eu-west-2'
+
+@pytest.fixture(scope='class')
+def s3_client(aws_credentials):
+    with mock_aws():
+        yield boto3.client('s3')
+
+class TestListExistingS3Files:
+    def test_error_if_no_bucket(self, s3_client, caplog):
+
+        logger = logging.getLogger()
+        logger.info('Testing now.')
+        caplog.set_level(logging.ERROR)
+        list_existing_s3_files(client=s3_client)
+        assert 'Error listing S3 objects' in caplog.text
+
+    def test_error_if_bucket_is_empty(self, s3_client, caplog):
+
+        s3_client.create_bucket(Bucket='extract_bucket', 
+                                CreateBucketConfiguration={
+                                    'LocationConstraint': 'eu-west-2'
+                                })
+        list_existing_s3_files(client=s3_client)
+        assert 'The bucket is empty' in caplog.text 
+
+    def test_error_retrieving_object(self, s3_client, caplog):
+        s3_client.upload_file('tests/dummy.txt', 'extract_bucket', 'dummy.txt')
+        list_existing_s3_files(bucket_name='test_bucket', client=s3_client)
+
+        assert 'Error retrieving S3 object ' in caplog.text
+
+    def test_retrieves_file_content(self, s3_client, caplog):
+        result = list_existing_s3_files(client=s3_client)
+
+        assert list(result.values()) == ['This is a test file.'] 
+
+class TestConnectToDatabase:
+    def test_connect_to_database(mock_conn, mock_config):
+        with patch("src.extract_lambda.Connection", autospec=True) as mock_conn:  
+            connect_to_database()
+            mock_conn.assert_called_with(
+            host="abc", user="def", port="5432", password="password", database="db"
+            )
+
+    def test_database_error(self, mock_config):
+        with pytest.raises(DBConnectionException):
+            connect_to_database()
+
+    def test_logs_interface_error(self, caplog):
+        logger = logging.getLogger()
+        logger.info('Testing now.')
+        caplog.set_level(logging.ERROR)
+        with pytest.raises(DBConnectionException):
+            connect_to_database()
+        assert 'Interface error' in caplog.text
+'''
+class TestProcessAndUploadTables:
+    def test_error_process_and_upload_tables(mock_conn, mock_config, s3_client, caplog):
+        logger = logging.getLogger()
+        logger.info('Testing now.')
+        caplog.set_level(logging.ERROR)
+        ####
+        queries = ["SELECT table_name FROM information_schema.tables WHERE table_schema='public' AND table_type='BASE TABLE';",
+                    "SELECT * FROM Fruits;",
+                    "SELECT column_name FROM INFORMATION_SCHEMA.COLUMNS where table_name = 'Fruits'"]
+        return_values = [[['Fruits']],
+                        [['Vegetable','Sour','Green'],['Berry','Sweet','Red']],
+                        [['Food_type'],['Flavour'],['Colour']]]
+        vals = dict(zip(queries,return_values))
+
+        ####
+        with patch('src.extract_lambda.connect_to_database') as mock_db:
+            mock_db().run.side_effects = return_values
+            s3_key = 'Fruits/2024/08/15/Fruits_16:46:30.csv'
+            existing_files = {s3_key: 'Food_type,Flavour,Colour\nFruit,Sour,Green\nBerry,Sweet,Red'}
+            s3_client.create_bucket(Bucket='extract_bucket', 
+                        CreateBucketConfiguration={'LocationConstraint': 'eu-west-2'})
+            s3_client.upload_file('tests/dummy_identical.csv', 'extract_bucket', s3_key)
+            process_and_upload_tables(mock_db(), existing_files, client=s3_client)
+            assert 'No new data.' in caplog.text
+'''
+\ No newline at end of file