From 68a0b4740e1aab2c507547ab985c7c1dc436d9c9 Mon Sep 17 00:00:00 2001 From: lian-manonog Date: Tue, 13 Aug 2024 17:16:12 +0100 Subject: wip: running terraform apply to continue fixing terraform infrastructure --- .gitignore | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 .gitignore (limited to '.gitignore') diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..5861f48 --- /dev/null +++ b/.gitignore @@ -0,0 +1,5 @@ +*.tfstate +*.tfstate.* +*.tfvars +*.tfvars.json +.terraform.tfstate.lock.info \ No newline at end of file -- cgit v1.2.3 From 4f0d6f287ae83d7cdc0df6988ab7b9de10912f16 Mon Sep 17 00:00:00 2001 From: T-Aji Date: Wed, 14 Aug 2024 12:25:57 +0100 Subject: feat/passing tests to helper function list_existing_s3_files --- .gitignore | 3 +++ src/extract_lambda.py | 12 ++++++----- tests/dummy.txt | 1 + tests/test_extract_lambda.py | 49 ++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 60 insertions(+), 5 deletions(-) create mode 100644 .gitignore create mode 100644 tests/dummy.txt create mode 100644 tests/test_extract_lambda.py (limited to '.gitignore') diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..428f94e --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +venv +.env +__pycache__/ \ No newline at end of file diff --git a/src/extract_lambda.py b/src/extract_lambda.py index 11ea5d1..dc70590 100644 --- a/src/extract_lambda.py +++ b/src/extract_lambda.py @@ -18,6 +18,7 @@ password = os.getenv('password') host = os.getenv('host') port = os.getenv('port') + def lambda_handler(event, context): """This lambda function connects to the Totesys database, lists the contents of the ingestion bucket, and converts all tables to CSV and if any of those tables do not exist in, or are different to the ones in s3, it uploads them @@ -69,27 +70,28 @@ def connect_to_database(): raise - -def list_existing_s3_files(): +def list_existing_s3_files(bucket_name='extract_bucket', client=boto3.client('s3')): """Creates a dictionary and populates it with the results of listing the contents of the s3 bucket, then returns the populated dictionary """ - client = boto3.client('s3') + existing_files = {} try: - response = client.list_objects_v2(Bucket=ingestion_bucket) + response = client.list_objects_v2(Bucket='extract_bucket') if 'Contents' in response: for obj in response['Contents']: s3_key = obj['Key'] try: - file_obj = client.get_object(Bucket=ingestion_bucket, Key=s3_key) + file_obj = client.get_object(Bucket=bucket_name, Key=s3_key) file_content = file_obj['Body'].read().decode('utf-8') existing_files[s3_key] = file_content except ClientError as e: logger.error(f'Error retrieving S3 object {s3_key}: {e}') + else: + logger.error('The bucket is empty') except ClientError as e: logger.error(f'Error listing S3 objects: {e}') diff --git a/tests/dummy.txt b/tests/dummy.txt new file mode 100644 index 0000000..af27ff4 --- /dev/null +++ b/tests/dummy.txt @@ -0,0 +1 @@ +This is a test file. \ No newline at end of file diff --git a/tests/test_extract_lambda.py b/tests/test_extract_lambda.py new file mode 100644 index 0000000..472e93a --- /dev/null +++ b/tests/test_extract_lambda.py @@ -0,0 +1,49 @@ +import pytest +import boto3 +from moto import mock_aws +from src.extract_lambda import list_existing_s3_files #process_and_upload_tables +import os +import logging + + +@pytest.fixture(scope='class') +def aws_credentials(): + os.environ["AWS_ACCESS_KEY_ID"] = 'testing' + os.environ["AWS_SECRET_ACCESS_KEY"] = 'testing' + os.environ["AWS_SECURIT_TOKEN"] = 'testing' + os.environ["AWS_SESSION_TOKEN"] = 'testing' + os.environ["AWS_DEFAULT_REGION"]= 'eu-west-2' + +@pytest.fixture(scope='class') +def s3_client(aws_credentials): + with mock_aws(): + yield boto3.client('s3') + +class TestListExistings3Files(): + def test_error_if_no_bucket(self, s3_client, caplog): + + logger = logging.getLogger() + logger.info('Testing now.') + caplog.set_level(logging.ERROR) + list_existing_s3_files(client=s3_client) + assert 'Error listing S3 objects' in caplog.text + + def test_error_if_bucket_is_empty(self, s3_client, caplog): + + s3_client.create_bucket(Bucket='extract_bucket', + CreateBucketConfiguration={ + 'LocationConstraint': 'eu-west-2' + }) + list_existing_s3_files(client=s3_client) + assert 'The bucket is empty' in caplog.text + + def test_error_retrieving_object(self, s3_client, caplog): + s3_client.upload_file('tests/dummy.txt', 'extract_bucket', 'dummy.txt') + list_existing_s3_files(bucket_name='test_bucket', client=s3_client) + + assert 'Error retrieving S3 object ' in caplog.text + + def test_retrieves_file_content(self, s3_client, caplog): + result = list_existing_s3_files(client=s3_client) + + assert list(result.values()) == ['This is a test file.'] \ No newline at end of file -- cgit v1.2.3 From 46673b671bef834dc2e043e7845e8a5b8fee9d34 Mon Sep 17 00:00:00 2001 From: lian-manonog Date: Wed, 14 Aug 2024 12:38:08 +0100 Subject: update gitignore --- .gitignore | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to '.gitignore') diff --git a/.gitignore b/.gitignore index 5861f48..239c7e0 100644 --- a/.gitignore +++ b/.gitignore @@ -2,4 +2,5 @@ *.tfstate.* *.tfvars *.tfvars.json -.terraform.tfstate.lock.info \ No newline at end of file +.terraform.tfstate.lock.info +*.zip \ No newline at end of file -- cgit v1.2.3 From 101e1e24cb38b6a45661b723881e2b2d6dd2fb07 Mon Sep 17 00:00:00 2001 From: lian-manonog Date: Wed, 14 Aug 2024 14:35:05 +0100 Subject: wip: terraform debugging --- .gitignore | 5 ++++- src/load_lambda.py | 2 ++ src/transform_lambda.py | 2 ++ terraform/events.tf | 18 ++++++++++-------- terraform/s3.tf | 34 +++++++++++++++++----------------- 5 files changed, 35 insertions(+), 26 deletions(-) (limited to '.gitignore') diff --git a/.gitignore b/.gitignore index 239c7e0..d759665 100644 --- a/.gitignore +++ b/.gitignore @@ -3,4 +3,7 @@ *.tfvars *.tfvars.json .terraform.tfstate.lock.info -*.zip \ No newline at end of file +*.zip +.terraform/ +.terraform* +log* \ No newline at end of file diff --git a/src/load_lambda.py b/src/load_lambda.py index e69de29..6ee681f 100644 --- a/src/load_lambda.py +++ b/src/load_lambda.py @@ -0,0 +1,2 @@ +def lambda_handler(): + pass \ No newline at end of file diff --git a/src/transform_lambda.py b/src/transform_lambda.py index e69de29..6ee681f 100644 --- a/src/transform_lambda.py +++ b/src/transform_lambda.py @@ -0,0 +1,2 @@ +def lambda_handler(): + pass \ No newline at end of file diff --git a/terraform/events.tf b/terraform/events.tf index 6744085..9fd89e4 100644 --- a/terraform/events.tf +++ b/terraform/events.tf @@ -1,7 +1,17 @@ +resource "aws_cloudwatch_event_target" "extract_lambda_cw_event" { + rule = aws_cloudwatch_event_rule.lambda_trigger.name + target_id = "TargetFunctionV1" + arn = aws_lambda_function.extract_lambda.arn #replaced lambda name placeholder + force_destroy = true +} + resource "aws_cloudwatch_event_rule" "lambda_trigger" { name = "lambda-scheduled-trigger" description = "Schedule to trigger the Lambda function" schedule_expression = "rate(30 minutes)" + force_destroy = true + # depends_on = [ + # aws_cloudwatch_event_target.extract_lambda_cw_event] # event_pattern = jsonencode({ # detail-type = @@ -10,14 +20,6 @@ resource "aws_cloudwatch_event_rule" "lambda_trigger" { # }) } - -resource "aws_cloudwatch_event_target" "extract_lambda_cw_event" { - rule = aws_cloudwatch_event_rule.lambda_trigger.name - target_id = "TargetFunctionV1" - arn = aws_lambda_function.extract_lambda.arn #replaced lambda name placeholder -} - - resource "aws_lambda_permission" "allow_eventbridge" { statement_id = "AllowExecutionFromEventBridge" action = "lambda:InvokeFunction" diff --git a/terraform/s3.tf b/terraform/s3.tf index 8ab5622..4c06b8e 100644 --- a/terraform/s3.tf +++ b/terraform/s3.tf @@ -32,20 +32,20 @@ resource "aws_s3_bucket" "lambda_code_bucket" { bucket_prefix = "${var.s3_code_bucket_name}-" } -resource "aws_s3_object" "extract_lambda_code" { - bucket = aws_s3_bucket.lambda_code_bucket.bucket - key = "${var.extract_lambda_name}/extract_function.zip" - source = "${path.module}/../extract_function.zip" -} # << can't figure out how this is being used but we seem to need it - -resource "aws_s3_object" "transform_lambda_code" { - bucket = aws_s3_bucket.lambda_code_bucket.bucket - key = "${var.transform_lambda_name}/transform_function.zip" - source = "${path.module}/../transform_function.zip" -} # << can't figure out how this is being used but we seem to need it - -resource "aws_s3_object" "load_lambda_code" { - bucket = aws_s3_bucket.lambda_code_bucket.bucket - key = "${var.load_lambda_name}/load_function.zip" - source = "${path.module}/../load_function.zip" -} \ No newline at end of file +# resource "aws_s3_object" "extract_lambda_code" { +# bucket = aws_s3_bucket.lambda_code_bucket.bucket +# key = "${var.extract_lambda_name}/extract_function.zip" +# source = "${path.module}/../extract_function.zip" +# } # << can't figure out how this is being used but we seem to need it + +# resource "aws_s3_object" "transform_lambda_code" { +# bucket = aws_s3_bucket.lambda_code_bucket.bucket +# key = "${var.transform_lambda_name}/transform_function.zip" +# source = "${path.module}/../transform_function.zip" +# } # << can't figure out how this is being used but we seem to need it + +# resource "aws_s3_object" "load_lambda_code" { +# bucket = aws_s3_bucket.lambda_code_bucket.bucket +# key = "${var.load_lambda_name}/load_function.zip" +# source = "${path.module}/../load_function.zip" +# } \ No newline at end of file -- cgit v1.2.3 From 5cdcbd64e9f4dba5f3ed8e8eb9f6e91e1adde0ba Mon Sep 17 00:00:00 2001 From: Alex Schofield Date: Wed, 14 Aug 2024 22:51:32 +0100 Subject: chore(gitignore): ignore .DS_Store --- .gitignore | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to '.gitignore') diff --git a/.gitignore b/.gitignore index d759665..d1df545 100644 --- a/.gitignore +++ b/.gitignore @@ -6,4 +6,5 @@ *.zip .terraform/ .terraform* -log* \ No newline at end of file +log* +.DS_Store -- cgit v1.2.3 From fe548561acc5e133e3bee4026aab85db2e511bcd Mon Sep 17 00:00:00 2001 From: lian-manonog Date: Thu, 15 Aug 2024 13:51:53 +0100 Subject: wip: secrets manager pushing to merge with extract_lambda --- .gitignore | 1 + src/extract_lambda.py | 1 + src/secrets_manager.py | 48 ++++++++++++++++++++++++++++++++++++++++++++ test/test_secrets_manager.py | 34 +++++++++++++++++++++++++++++++ 4 files changed, 84 insertions(+) create mode 100644 src/secrets_manager.py create mode 100644 test/test_secrets_manager.py (limited to '.gitignore') diff --git a/.gitignore b/.gitignore index d1df545..d164c3f 100644 --- a/.gitignore +++ b/.gitignore @@ -8,3 +8,4 @@ .terraform* log* .DS_Store +venv \ No newline at end of file diff --git a/src/extract_lambda.py b/src/extract_lambda.py index 7d56c66..faa1d30 100644 --- a/src/extract_lambda.py +++ b/src/extract_lambda.py @@ -2,6 +2,7 @@ from pg8000.native import Connection, Error, DatabaseError, InterfaceError from dotenv import load_dotenv import os + load_dotenv() def extract(): diff --git a/src/secrets_manager.py b/src/secrets_manager.py new file mode 100644 index 0000000..c0fb61e --- /dev/null +++ b/src/secrets_manager.py @@ -0,0 +1,48 @@ +import boto3 +from botocore.exceptions import ClientError +import json + + +def sm_client(): + sm_client = boto3.client('secretsmanager') + yield sm_client + +def create_secret(sm_client, secret_name, cohort_id, user, password, host, database, port): + secret = { + "cohort_id": cohort_id, + "user": user, + "password": password, + "host": host, + "database": database, + "port": port + } + + response = sm_client.create_secret( + Name = secret_name, + SecretString = json.dumps(secret) + ) + + print(response) + return response + +def list_secret(sm_client): + response = sm_client.list_secrets() + secret_dict = response['SecretList'] + secret_names = [] + for items in secret_dict: + secret_names.append(items['Name']) + print(f'{len(secret_names)} secret(s) available') + for name in secret_names: + print(name) + return secret_names + +def retrieve_secrets(sm_client): + response = sm_client.get_secrets( + + ) + + + +#retrieve secret +#so lambda can access totesy db +#so lambda connect to the db and then retrieve the data \ No newline at end of file diff --git a/test/test_secrets_manager.py b/test/test_secrets_manager.py new file mode 100644 index 0000000..86533bc --- /dev/null +++ b/test/test_secrets_manager.py @@ -0,0 +1,34 @@ +from src.secrets_manager import sm_client, create_secret, list_secret +import boto3 +from moto import mock_aws +import json +import pytest +import os + +pytest.fixture(scope='class') +def mock_aws_credentials(): + """Mocked AWS Credentials for moto.""" + os.environ["AWS_ACCESS_KEY_ID"] = "testing" + os.environ["AWS_SECRET_ACCESS_KEY"] = "testing" + os.environ["AWS_SECURITY_TOKEN"] = "testing" + os.environ["AWS_SESSION_TOKEN"] = "testing" + os.environ["AWS_DEFAULT_REGION"] = "eu-west-2" + +@pytest.fixture(scope='class') +def mock_sm_client(mock_aws_credentials): + with mock_aws(): + yield boto3.client('secretsmanager') + + +def test_create_secret_stores_secrets(mock_sm_client): + cohort_id = "test_cohort_id" + user = "test_user_id" + password = "test_password" + host = "test_host" + database = "test_database" + port = "test_port" + + secret_name = "test_secret" + response = create_secret(mock_sm_client, secret_name, cohort_id, user, password, host, database, port) + + assert response['Name'] == secret_name \ No newline at end of file -- cgit v1.2.3 From dd68d948dec97fedfcaa89806523975ad1224c71 Mon Sep 17 00:00:00 2001 From: Ang Bel Date: Fri, 16 Aug 2024 13:48:22 +0100 Subject: refactoring for extract lambda to filter by last updated and if not empty write it s3 --- .gitignore | 2 ++ src/extract_lambda.py | 26 +++++++++++--------------- 2 files changed, 13 insertions(+), 15 deletions(-) (limited to '.gitignore') diff --git a/.gitignore b/.gitignore index ca15434..bceab93 100644 --- a/.gitignore +++ b/.gitignore @@ -14,3 +14,5 @@ __pycache__/ # OS-Related Files .DS_Store + +*venv* diff --git a/src/extract_lambda.py b/src/extract_lambda.py index f4c0c1d..e348bef 100644 --- a/src/extract_lambda.py +++ b/src/extract_lambda.py @@ -136,9 +136,9 @@ def process_and_upload_tables(db, existing_files, client=boto3.client('s3')): print(tables) for table in tables: table_name = table[0] - rows = db.run(f"SELECT * FROM {table_name};") - + rows = db.run(f"SELECT * FROM {table_name} WHERE last_updated >= {datetime.strftime(latest_timestamp,'%H-%m-%d %H:%M:%S')};") + if rows: csv_file_path = f"/tmp/{table_name}.csv" with open(csv_file_path, "w", newline='') as file: writer = csv.writer(file) @@ -147,16 +147,12 @@ def process_and_upload_tables(db, existing_files, client=boto3.client('s3')): writer.writerow(column_names) writer.writerows(rows) s3_key = datetime.strftime(datetime.today(),f'{table_name}/%Y/%m/%d/{table_name}_%H:%M:%S.csv') - new_csv_content = open(csv_file_path, "r").read() - ## NEW CODE - latest_s3_object_key = datetime.strftime(latest_timestamp,f'{table_name}/%Y/%m/%d/{table_name}_%H:%M:%S.csv') - ## END OF NEW CODE - if existing_files[latest_s3_object_key] != new_csv_content: - try: - client.upload_file(csv_file_path, extract_bucket(), s3_key) - logger.info(f"Uploaded {s3_key} to S3.") - except ClientError as e: - logger.error(f'Error uploading to S3: {e}') - else: - logger.info(f"No new data.") - \ No newline at end of file + + try: + client.upload_file(csv_file_path, extract_bucket(), s3_key) + logger.info(f"Uploaded {s3_key} to S3.") + except ClientError as e: + logger.error(f'Error uploading to S3: {e}') + else: + logger.info(f"No new data.") + \ No newline at end of file -- cgit v1.2.3 From 24a4573d6cf64ec0383ae16bfba09a0ffdb8c129 Mon Sep 17 00:00:00 2001 From: T-Aji Date: Mon, 19 Aug 2024 11:49:08 +0100 Subject: update .gitignore --- .gitignore | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to '.gitignore') diff --git a/.gitignore b/.gitignore index bceab93..6aa03fc 100644 --- a/.gitignore +++ b/.gitignore @@ -14,5 +14,4 @@ __pycache__/ # OS-Related Files .DS_Store - -*venv* +venv \ No newline at end of file -- cgit v1.2.3