From fe5c2ef279b3190b242df08de2b680b4de5cac4c Mon Sep 17 00:00:00 2001 From: lian-manonog Date: Mon, 12 Aug 2024 16:51:56 +0100 Subject: Completed the main.tf and vars.tf file. Currently: figuring out the configurations for AWS IAM USERS --- terraform/main.tf | 26 ++++++++++++++++++++++++++ terraform/vars.tf | 23 +++++++++++++++++++++++ 2 files changed, 49 insertions(+) create mode 100644 terraform/main.tf create mode 100644 terraform/vars.tf diff --git a/terraform/main.tf b/terraform/main.tf new file mode 100644 index 0000000..3ca9a3d --- /dev/null +++ b/terraform/main.tf @@ -0,0 +1,26 @@ +terraform { + required_providers { + aws = { + source = "hashicorp/aws" + version = "~>5.0" + } + } + backend "s3" { + bucket = "bentley-secrets" + key = "bentley-project/terraform.tfstate" + region = "eu-west-2" + } +} + +provider "aws" { + region = "eu-west-2" + default_tags { + tags = { + ProjectName = "Terrific-Totes" + Team = "Team-Bentley" + Environment = "Dev" + GitHubRepo = "de-project-bentley" + ManagedBy = "Terraform" + } + } +} \ No newline at end of file diff --git a/terraform/vars.tf b/terraform/vars.tf new file mode 100644 index 0000000..166f2c5 --- /dev/null +++ b/terraform/vars.tf @@ -0,0 +1,23 @@ +variable "s3_extract_bucket_name" { + type = string + default = "extract-bucket" +} + +variable "s3_transform_bucket_name" { + type = string + default = "transform-bucket" +} + +variable "extract_lambda_name" { + type = string + default = "extract-lambda" +} + +variable "transform_lambda_name" { + type = string + default = "transform-lambda" +} + +data "aws_caller_identity" "current" {} + +data "aws_region" "current" {} \ No newline at end of file -- cgit v1.2.3 From ef770c1ea4ee633489323a8ab321b1214b51a770 Mon Sep 17 00:00:00 2001 From: Ellie Date: Mon, 12 Aug 2024 16:57:11 +0100 Subject: chore: add aws_iam_role --- terraform/iam.tf | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) create mode 100644 terraform/iam.tf diff --git a/terraform/iam.tf b/terraform/iam.tf new file mode 100644 index 0000000..7501373 --- /dev/null +++ b/terraform/iam.tf @@ -0,0 +1,29 @@ +# define + +resource "aws_iam_role" "bentley_service_role" { + assume_role_policy = < Date: Tue, 13 Aug 2024 09:47:21 +0100 Subject: [feat]/add eventbridge schedule and step function trigger 1 of 2 --- terraform/events.tf | 52 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 52 insertions(+) create mode 100644 terraform/events.tf diff --git a/terraform/events.tf b/terraform/events.tf new file mode 100644 index 0000000..25fb35b --- /dev/null +++ b/terraform/events.tf @@ -0,0 +1,52 @@ +resource "aws_cloudwatch_event_rule" "lambda_trigger" { + name = "lambda-scheduled-trigger" + description = "Schedule to trigger the Lambda function" + schedule_expression = "rate(30 minutes)" + +# event_pattern = jsonencode({ +# detail-type = [ +# "AWS Console Sign In via CloudTrail" +# ] +# }) +} + + +resource "aws_cloudwatch_event_target" "lambda" { + rule = aws_cloudwatch_event_rule.lambda_trigger.name + target_id = "TargetFunctionV1" + arn = aws_lambda_function.my_lambda_function.arn +} + + + +resource "aws_lambda_permission" "allow_eventbridge" { + statement_id = "AllowExecutionFromEventBridge" + action = "lambda:InvokeFunction" + function_name = aws_lambda_function.my_lambda_function.function_name + principal = "events.amazonaws.com" + source_arn = aws_cloudwatch_event_rule.lambda_trigger.arn +} + + +# below is step function 1 +resource "aws_lambda_permission" "allow_s3_ingestion" { + statement_id = "AllowS3InvokeLambdaTransform" + action = "lambda:InvokeFunction" + function_name = aws_lambda_function.lambda_transform.function_name + principal = "s3.amazonaws.com" + source_arn = aws_s3_bucket.extract.arn +} + + +resource "aws_s3_bucket_notification" "extract_bucket_notification" { + bucket = aws_s3_bucket.extract.id + + lambda_function { + events = ["s3:ObjectCreated:*"] + lambda_function_arn = aws_lambda_function.lambda_transform.arn + } + + depends_on = [aws_lambda_permission.allow_s3_ingestion] +} + +# need to duplicate and replace "2" with "3" \ No newline at end of file -- cgit v1.2.3 From e3e9817f4e88afc8eb89e0b18a7fe8b1f381e0d4 Mon Sep 17 00:00:00 2001 From: T-Aji Date: Tue, 13 Aug 2024 09:57:14 +0100 Subject: [feat]/add step function trigger 2 of 2 --- terraform/events.tf | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/terraform/events.tf b/terraform/events.tf index 25fb35b..4d68a23 100644 --- a/terraform/events.tf +++ b/terraform/events.tf @@ -49,4 +49,24 @@ resource "aws_s3_bucket_notification" "extract_bucket_notification" { depends_on = [aws_lambda_permission.allow_s3_ingestion] } -# need to duplicate and replace "2" with "3" \ No newline at end of file +###### + +resource "aws_lambda_permission" "allow_s3_transfrom_bucket" { + statement_id = "AllowS3InvokeLambdaTransform" + action = "lambda:InvokeFunction" + function_name = aws_lambda_function.lambda_transform.function_name + principal = "s3.amazonaws.com" + source_arn = aws_s3_bucket.transform.arn +} + + +resource "aws_s3_bucket_notification" "transform_bucket_notification" { + bucket = aws_s3_bucket.transform.id + + lambda_function { + events = ["s3:ObjectCreated:*"] + lambda_function_arn = aws_lambda_function.lambda_transform.arn + } + + depends_on = [aws_lambda_permission.allow_s3_transform] +} \ No newline at end of file -- cgit v1.2.3 From c8d8d2cee4262782890ea68cda8fc86f61098b09 Mon Sep 17 00:00:00 2001 From: Ang Bel Date: Tue, 13 Aug 2024 10:14:01 +0100 Subject: s3 buckets in tf and initial blank lambda py files set-up in src folder for the next task of lambda tf set up --- src/extract_lambda.py | 0 src/load_lambda.py | 0 src/transform_lambda.py | 0 terraform/s3.tf | 17 +++++++++++++++++ terraform/vars.tf | 5 +++++ 5 files changed, 22 insertions(+) create mode 100644 src/extract_lambda.py create mode 100644 src/load_lambda.py create mode 100644 src/transform_lambda.py create mode 100644 terraform/s3.tf diff --git a/src/extract_lambda.py b/src/extract_lambda.py new file mode 100644 index 0000000..e69de29 diff --git a/src/load_lambda.py b/src/load_lambda.py new file mode 100644 index 0000000..e69de29 diff --git a/src/transform_lambda.py b/src/transform_lambda.py new file mode 100644 index 0000000..e69de29 diff --git a/terraform/s3.tf b/terraform/s3.tf new file mode 100644 index 0000000..bfe891e --- /dev/null +++ b/terraform/s3.tf @@ -0,0 +1,17 @@ +resource "aws_s3_bucket" "extract_bucket" { + bucket = "${var.s3_extract_bucket_name}" +} + +resource "aws_s3_bucket" "transform_bucket" { + bucket = "${var.s3_transform_bucket_name}" +} + +resource "aws_s3_bucket" "lambda_bucket" { + bucket = "${var.s3_code_bucket_name}" +} + +resource "aws_s3_object" "extract_lambda_code" { + bucket = aws_s3_bucket.s3_code_bucket_name.bucket + key = "${var.extract_lambda_name}/function_e.zip" + source = "${path.module}/../function_e.zip" +} \ No newline at end of file diff --git a/terraform/vars.tf b/terraform/vars.tf index 166f2c5..fa84222 100644 --- a/terraform/vars.tf +++ b/terraform/vars.tf @@ -8,6 +8,11 @@ variable "s3_transform_bucket_name" { default = "transform-bucket" } +variable "s3_code_bucket_name" { + type = string + default = "lambda-bucket" +} + variable "extract_lambda_name" { type = string default = "extract-lambda" -- cgit v1.2.3 From c75e650dbeb1390336d15487a2c87c53337cd8dc Mon Sep 17 00:00:00 2001 From: Ellie Date: Tue, 13 Aug 2024 11:25:33 +0100 Subject: infra(tf): add s3 policy for list & write --- terraform/iam.tf | 29 ++++++++++++++++++++++++----- 1 file changed, 24 insertions(+), 5 deletions(-) diff --git a/terraform/iam.tf b/terraform/iam.tf index 7501373..b9919a5 100644 --- a/terraform/iam.tf +++ b/terraform/iam.tf @@ -1,5 +1,3 @@ -# define - resource "aws_iam_role" "bentley_service_role" { assume_role_policy = < Date: Tue, 13 Aug 2024 11:30:28 +0100 Subject: database connection added to func --- src/extract_lambda.py | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/src/extract_lambda.py b/src/extract_lambda.py index e69de29..7d56c66 100644 --- a/src/extract_lambda.py +++ b/src/extract_lambda.py @@ -0,0 +1,32 @@ +from pg8000.native import Connection, Error, DatabaseError, InterfaceError +from dotenv import load_dotenv +import os + +load_dotenv() + +def extract(): + +# temporary credentials for dev- will not have access when uploaded + + database = os.getenv('database') + user = os.getenv('user') + password = os.getenv('password') + host = os.getenv('host') + port = os.getenv('port') + + + try: + db = Connection.run( + database=database, + user=user, + password=password, + host=host, + port=port + ) + except DatabaseError as e: + print(e) + except InterfaceError as i: + print(i) + + + \ No newline at end of file -- cgit v1.2.3 From 65e470c0bce51381da8f401f0ba07bd20a76071f Mon Sep 17 00:00:00 2001 From: Ellie Date: Tue, 13 Aug 2024 11:55:00 +0100 Subject: infra(tf): add wip write policy and attach policy --- terraform/iam.tf | 33 +++++++++++++++++++++------------ 1 file changed, 21 insertions(+), 12 deletions(-) diff --git a/terraform/iam.tf b/terraform/iam.tf index b9919a5..dda4d74 100644 --- a/terraform/iam.tf +++ b/terraform/iam.tf @@ -22,17 +22,12 @@ resource "aws_iam_role" "bentley_service_role" { EOF } +# lambda setup + + # s3 setup -# allows to list and retrieve s3 buckets, and allows retention/tagging/access control settings +# allows allows retention/tagging/access control settings data "aws_iam_policy_document" "s3_data_policy_doc" { - statement { - actions = [ - "s3:ListAllMyBuckets", - "s3:GetBucketLocation" - ] - resources = ["arn:aws:s3:::*"] - } - statement { actions = [ "s3:PutObject", @@ -41,8 +36,22 @@ data "aws_iam_policy_document" "s3_data_policy_doc" { "s3:PutObjectAcl" ] resources = [ - "${aws_s3_bucket.data_bucket.arn}/*", - "${aws_s3_bucket.code_bucket.arn}/*" + "${aws_s3_bucket.extract_bucket.arn}/*", + "${aws_s3_bucket.transform_bucket.arn}/*", + "${aws_s3_bucket.lambda_bucket.arn}/*", ] } -} \ No newline at end of file +} + +# write policy +resource "aws_iam_policy" "s3_policy" { + policy = data.aws_iam_policy_document.s3_data_policy_doc.json +} + +# attach policy to role +resource "aws_iam_role_policy_attachment" "s3_policy_attachment" { + role = aws_iam_role.bentley_service_role.name + policy_arn = aws_iam_policy.s3_policy.arn +} + +# lambda setup -- cgit v1.2.3 From 936eee1eb44d8bfdbd148d22b749966e9606fb46 Mon Sep 17 00:00:00 2001 From: Ellie Date: Tue, 13 Aug 2024 11:58:02 +0100 Subject: infra(tf): add wip lambda role --- terraform/iam.tf | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/terraform/iam.tf b/terraform/iam.tf index dda4d74..10b8749 100644 --- a/terraform/iam.tf +++ b/terraform/iam.tf @@ -23,6 +23,9 @@ resource "aws_iam_role" "bentley_service_role" { } # lambda setup +resource "aws_iam_role" "lambda_role" { + assume_role_policy = data.aws_iam_policy_document.bentley_service_role.json +} # s3 setup @@ -44,14 +47,14 @@ data "aws_iam_policy_document" "s3_data_policy_doc" { } # write policy -resource "aws_iam_policy" "s3_policy" { +resource "aws_iam_policy" "s3_write_policy" { policy = data.aws_iam_policy_document.s3_data_policy_doc.json } # attach policy to role resource "aws_iam_role_policy_attachment" "s3_policy_attachment" { - role = aws_iam_role.bentley_service_role.name - policy_arn = aws_iam_policy.s3_policy.arn + role = aws_iam_role.lambda_role.name + policy_arn = aws_iam_policy.s3_write_policy.arn } # lambda setup -- cgit v1.2.3 From eb09f0f6a42e2a2ce9529492a47a34f782ffad53 Mon Sep 17 00:00:00 2001 From: Ellie Date: Tue, 13 Aug 2024 12:17:52 +0100 Subject: infra(tf): clean-up code & init lambda iam setup --- terraform/iam.tf | 123 +++++++++++++++++++++++++++++++++---------------------- 1 file changed, 74 insertions(+), 49 deletions(-) diff --git a/terraform/iam.tf b/terraform/iam.tf index 10b8749..ecc63b1 100644 --- a/terraform/iam.tf +++ b/terraform/iam.tf @@ -1,60 +1,85 @@ -resource "aws_iam_role" "bentley_service_role" { - assume_role_policy = < Date: Tue, 13 Aug 2024 12:30:20 +0100 Subject: replacement for events.tf variable placeholders, s3 bucket notifications, lambda.tf set up (function, zip, permissions), code bucket to store zipped lambdas --- events - TBD?.tf | 52 +++++++++++++++++++++++++++++++++++++ events.tf | 52 ------------------------------------- terraform/events.tf | 23 ++++++++--------- terraform/lambda.tf | 74 +++++++++++++++++++++++++++++++++++++++++++++++++++++ terraform/s3.tf | 39 ++++++++++++++++++++++------ terraform/vars.tf | 5 ++++ 6 files changed, 173 insertions(+), 72 deletions(-) create mode 100644 events - TBD?.tf delete mode 100644 events.tf create mode 100644 terraform/lambda.tf diff --git a/events - TBD?.tf b/events - TBD?.tf new file mode 100644 index 0000000..25fb35b --- /dev/null +++ b/events - TBD?.tf @@ -0,0 +1,52 @@ +resource "aws_cloudwatch_event_rule" "lambda_trigger" { + name = "lambda-scheduled-trigger" + description = "Schedule to trigger the Lambda function" + schedule_expression = "rate(30 minutes)" + +# event_pattern = jsonencode({ +# detail-type = [ +# "AWS Console Sign In via CloudTrail" +# ] +# }) +} + + +resource "aws_cloudwatch_event_target" "lambda" { + rule = aws_cloudwatch_event_rule.lambda_trigger.name + target_id = "TargetFunctionV1" + arn = aws_lambda_function.my_lambda_function.arn +} + + + +resource "aws_lambda_permission" "allow_eventbridge" { + statement_id = "AllowExecutionFromEventBridge" + action = "lambda:InvokeFunction" + function_name = aws_lambda_function.my_lambda_function.function_name + principal = "events.amazonaws.com" + source_arn = aws_cloudwatch_event_rule.lambda_trigger.arn +} + + +# below is step function 1 +resource "aws_lambda_permission" "allow_s3_ingestion" { + statement_id = "AllowS3InvokeLambdaTransform" + action = "lambda:InvokeFunction" + function_name = aws_lambda_function.lambda_transform.function_name + principal = "s3.amazonaws.com" + source_arn = aws_s3_bucket.extract.arn +} + + +resource "aws_s3_bucket_notification" "extract_bucket_notification" { + bucket = aws_s3_bucket.extract.id + + lambda_function { + events = ["s3:ObjectCreated:*"] + lambda_function_arn = aws_lambda_function.lambda_transform.arn + } + + depends_on = [aws_lambda_permission.allow_s3_ingestion] +} + +# need to duplicate and replace "2" with "3" \ No newline at end of file diff --git a/events.tf b/events.tf deleted file mode 100644 index 25fb35b..0000000 --- a/events.tf +++ /dev/null @@ -1,52 +0,0 @@ -resource "aws_cloudwatch_event_rule" "lambda_trigger" { - name = "lambda-scheduled-trigger" - description = "Schedule to trigger the Lambda function" - schedule_expression = "rate(30 minutes)" - -# event_pattern = jsonencode({ -# detail-type = [ -# "AWS Console Sign In via CloudTrail" -# ] -# }) -} - - -resource "aws_cloudwatch_event_target" "lambda" { - rule = aws_cloudwatch_event_rule.lambda_trigger.name - target_id = "TargetFunctionV1" - arn = aws_lambda_function.my_lambda_function.arn -} - - - -resource "aws_lambda_permission" "allow_eventbridge" { - statement_id = "AllowExecutionFromEventBridge" - action = "lambda:InvokeFunction" - function_name = aws_lambda_function.my_lambda_function.function_name - principal = "events.amazonaws.com" - source_arn = aws_cloudwatch_event_rule.lambda_trigger.arn -} - - -# below is step function 1 -resource "aws_lambda_permission" "allow_s3_ingestion" { - statement_id = "AllowS3InvokeLambdaTransform" - action = "lambda:InvokeFunction" - function_name = aws_lambda_function.lambda_transform.function_name - principal = "s3.amazonaws.com" - source_arn = aws_s3_bucket.extract.arn -} - - -resource "aws_s3_bucket_notification" "extract_bucket_notification" { - bucket = aws_s3_bucket.extract.id - - lambda_function { - events = ["s3:ObjectCreated:*"] - lambda_function_arn = aws_lambda_function.lambda_transform.arn - } - - depends_on = [aws_lambda_permission.allow_s3_ingestion] -} - -# need to duplicate and replace "2" with "3" \ No newline at end of file diff --git a/terraform/events.tf b/terraform/events.tf index 4d68a23..0196dc3 100644 --- a/terraform/events.tf +++ b/terraform/events.tf @@ -11,18 +11,17 @@ resource "aws_cloudwatch_event_rule" "lambda_trigger" { } -resource "aws_cloudwatch_event_target" "lambda" { +resource "aws_cloudwatch_event_target" "extract_lambda_cw_event" { rule = aws_cloudwatch_event_rule.lambda_trigger.name target_id = "TargetFunctionV1" - arn = aws_lambda_function.my_lambda_function.arn + arn = aws_lambda_function.extract_lambda.arn #replaced lambda name placeholder } - resource "aws_lambda_permission" "allow_eventbridge" { statement_id = "AllowExecutionFromEventBridge" action = "lambda:InvokeFunction" - function_name = aws_lambda_function.my_lambda_function.function_name + function_name = aws_lambda_function.extract_lambda.function_name #replaced lambda name placeholder principal = "events.amazonaws.com" source_arn = aws_cloudwatch_event_rule.lambda_trigger.arn } @@ -32,18 +31,18 @@ resource "aws_lambda_permission" "allow_eventbridge" { resource "aws_lambda_permission" "allow_s3_ingestion" { statement_id = "AllowS3InvokeLambdaTransform" action = "lambda:InvokeFunction" - function_name = aws_lambda_function.lambda_transform.function_name + function_name = aws_lambda_function.transform_lambda.function_name #replaced lambda name placeholder principal = "s3.amazonaws.com" - source_arn = aws_s3_bucket.extract.arn + source_arn = aws_s3_bucket.extract_bucket.arn #replaced bucket name placeholder } resource "aws_s3_bucket_notification" "extract_bucket_notification" { - bucket = aws_s3_bucket.extract.id + bucket = aws_s3_bucket.extract_bucket.id #replaced bucket name placeholder lambda_function { events = ["s3:ObjectCreated:*"] - lambda_function_arn = aws_lambda_function.lambda_transform.arn + lambda_function_arn = aws_lambda_function.transform_lambda.arn #replaced lambda name placeholder } depends_on = [aws_lambda_permission.allow_s3_ingestion] @@ -54,18 +53,18 @@ resource "aws_s3_bucket_notification" "extract_bucket_notification" { resource "aws_lambda_permission" "allow_s3_transfrom_bucket" { statement_id = "AllowS3InvokeLambdaTransform" action = "lambda:InvokeFunction" - function_name = aws_lambda_function.lambda_transform.function_name + function_name = aws_lambda_function.transform_lambda.function_name #replaced lambda name placeholder principal = "s3.amazonaws.com" - source_arn = aws_s3_bucket.transform.arn + source_arn = aws_s3_bucket.transform_bucket.arn #replaced bucket name placeholder } resource "aws_s3_bucket_notification" "transform_bucket_notification" { - bucket = aws_s3_bucket.transform.id + bucket = aws_s3_bucket.transform_bucket.id #replaced bucket name placeholder lambda_function { events = ["s3:ObjectCreated:*"] - lambda_function_arn = aws_lambda_function.lambda_transform.arn + lambda_function_arn = aws_lambda_function.transform_lambda.arn #replaced lambda name placeholder } depends_on = [aws_lambda_permission.allow_s3_transform] diff --git a/terraform/lambda.tf b/terraform/lambda.tf new file mode 100644 index 0000000..09d6697 --- /dev/null +++ b/terraform/lambda.tf @@ -0,0 +1,74 @@ +### EXTRACT LAMBDA SET UP +data "archive_file" "extract_lambda_zip" { + type = "zip" + source_file = "${path.module}/../src/extract_lambda.py" + output_path = "${path.module}/../extract_function.zip" +} + +resource "aws_lambda_function" "extract_lambda" { + function_name = "${var.extract_lambda_name}" + s3_bucket = aws_s3_bucket.lambda_bucket.bucket + s3_key = "extract_lambda/extract_function.zip" + role = aws_iam_role.PLACEHOLDER_extract_lambda_role.arn # << lambda role placehodler + handler = "extract_lambda.lambda_handler" # << check that the function is called lambda handler + runtime = "python3.11" + environment { + variables = { + output = aws_s3_bucket.extract_bucket.bucket + } + } +} + +resource "aws_lambda_permission" "allow_to_write_to_s3_extract_bucket" { + action = "lambda:InvokeFunction" + function_name = aws_lambda_function.extract_lambda.function_name + principal = "s3.amazonaws.com" + source_arn = aws_s3_bucket.extract_bucket.arn +} + + +### TRANSFORM LAMBDA SET UP +data "archive_file" "transform_lambda_zip" { + type = "zip" + source_file = "${path.module}/../src/transform_lambda.py" + output_path = "${path.module}/../transform_function.zip" +} + +resource "aws_lambda_function" "transform_lambda" { + function_name = "${var.transform_lambda_name}" + s3_bucket = aws_s3_bucket.lambda_bucket.bucket + s3_key = "transform_lambda/transform_function.zip" + role = aws_iam_role.PLACEHOLDER_transform_lambda_role.arn # << lambda role placehodler + handler = "transform_lambda.lambda_handler" # << check that the function is called lambda handler + runtime = "python3.11" + environment { + variables = { + output = aws_s3_bucket.transform_bucket.bucket + } + } +} + +resource "aws_lambda_permission" "allow_to_write_to_s3_transform_bucket" { + action = "lambda:InvokeFunction" + function_name = aws_lambda_function.transform_lambda.function_name + principal = "s3.amazonaws.com" + source_arn = aws_s3_bucket.transform_bucket.arn +} + + +### LOAD LAMBDA SET UP +data "archive_file" "load_lambda_zip" { + type = "zip" + source_file = "${path.module}/../src/load_lambda.py" + output_path = "${path.module}/../load_function.zip" +} + +resource "aws_lambda_function" "load_lambda" { + function_name = "${var.load_lambda_name}" + s3_bucket = aws_s3_bucket.lambda_bucket.bucket + s3_key = "load_lambda/load_function.zip" + role = aws_iam_role.PLACEHOLDER_load_lambda_role.arn # << lambda role placehodler + handler = "load_lambda.lambda_handler" # << check that the function is called lambda handler + runtime = "python3.11" +} + diff --git a/terraform/s3.tf b/terraform/s3.tf index bfe891e..8cb65ef 100644 --- a/terraform/s3.tf +++ b/terraform/s3.tf @@ -1,17 +1,40 @@ +### EXTRACT BUCKET SET-UP resource "aws_s3_bucket" "extract_bucket" { bucket = "${var.s3_extract_bucket_name}" } +resource "aws_s3_object" "extract_lambda_code" { + bucket = aws_s3_bucket.s3_code_bucket_name.bucket + key = "${var.extract_lambda_name}/extract_function.zip" + source = "${path.module}/../extract_function.zip" +} # << can't figure out how this is being used but we seem to need it + +resource "aws_s3_bucket_notification" "extract_bucket_notification" { + bucket = aws_s3_bucket.extract_bucket.id + lambda_function { + lambda_function_arn = aws_lambda_function.extract_lambda.arn + events = ["s3:ObjectCreated:*"] + } + depends_on = [aws_lambda_permission.allow_to_write_to_s3_extract_bucket] +} # << is this the correct permission dependency? + + +### TRANSFORM BUCKET SET-UP resource "aws_s3_bucket" "transform_bucket" { bucket = "${var.s3_transform_bucket_name}" } -resource "aws_s3_bucket" "lambda_bucket" { - bucket = "${var.s3_code_bucket_name}" -} - -resource "aws_s3_object" "extract_lambda_code" { +resource "aws_s3_object" "transform_lambda_code" { bucket = aws_s3_bucket.s3_code_bucket_name.bucket - key = "${var.extract_lambda_name}/function_e.zip" - source = "${path.module}/../function_e.zip" -} \ No newline at end of file + key = "${var.transform_lambda_name}/transform_function.zip" + source = "${path.module}/../transform_function.zip" +} # << can't figure out how this is being used but we seem to need it + +resource "aws_s3_bucket_notification" "transform_bucket_notification" { + bucket = aws_s3_bucket.transform_bucket.id + lambda_function { + lambda_function_arn = aws_lambda_function.transform_lambda.arn + events = ["s3:ObjectCreated:*"] + } + depends_on = [aws_lambda_permission.allow_to_write_to_s3_transform_bucket] +} # << is this the correct permission dependency? diff --git a/terraform/vars.tf b/terraform/vars.tf index fa84222..cc9348a 100644 --- a/terraform/vars.tf +++ b/terraform/vars.tf @@ -23,6 +23,11 @@ variable "transform_lambda_name" { default = "transform-lambda" } +variable "load_lambda_name" { + type = string + default = "load-lambda" +} + data "aws_caller_identity" "current" {} data "aws_region" "current" {} \ No newline at end of file -- cgit v1.2.3 From 974a8018f79d8592cbd6a59b1b26a9d288975328 Mon Sep 17 00:00:00 2001 From: T-Aji Date: Tue, 13 Aug 2024 12:30:42 +0100 Subject: dumps data to csv --- src/extract_lambda.py | 32 +++++++++++++++++++++++++++----- 1 file changed, 27 insertions(+), 5 deletions(-) diff --git a/src/extract_lambda.py b/src/extract_lambda.py index 7d56c66..8317ef8 100644 --- a/src/extract_lambda.py +++ b/src/extract_lambda.py @@ -1,22 +1,25 @@ from pg8000.native import Connection, Error, DatabaseError, InterfaceError from dotenv import load_dotenv import os +import boto3 +import csv +from botocore.exceptions import ClientError load_dotenv() -def extract(): +def lambda_handler(event, context): + client = boto3.client('s3') # temporary credentials for dev- will not have access when uploaded - + database = os.getenv('database') user = os.getenv('user') password = os.getenv('password') host = os.getenv('host') port = os.getenv('port') - try: - db = Connection.run( + db = Connection( database=database, user=user, password=password, @@ -27,6 +30,25 @@ def extract(): print(e) except InterfaceError as i: print(i) - + #replace prints with upload to cloudwatch logs + + tables = db.run("SELECT table_name FROM information_schema.tables WHERE table_schema='public' AND table_type='BASE TABLE';") + for table in tables: + table_name = table[0] + rows = db.run(f"SELECT * FROM {table_name};") + # this saves the csv files to the repo root before writing to s3, this is unnecessary. how will the lambda behave when it attempts to save files? + with open(f"{table_name}.csv", "w", newline='') as file: + writer = csv.writer(file) + writer.writerow([desc["name"] for desc in db.columns(f"SELECT * FROM {table_name};")]) + writer.writerows(rows) + try: + client.upload_file(file, Bucket='ingestion-bucket', Object_name=table_name) + + except ClientError as e: + print(e) + #replace print with upload to cloudwatch logs + + if db: + db.close() \ No newline at end of file -- cgit v1.2.3 From 3c824df60374380d044cb9181672fa76b610d84f Mon Sep 17 00:00:00 2001 From: Ellie Date: Tue, 13 Aug 2024 12:30:53 +0100 Subject: infra(tf): clean-up code --- terraform/iam.tf | 69 +++++++++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 56 insertions(+), 13 deletions(-) diff --git a/terraform/iam.tf b/terraform/iam.tf index ecc63b1..bb8d932 100644 --- a/terraform/iam.tf +++ b/terraform/iam.tf @@ -4,7 +4,7 @@ ######################################################################## # DEFINE MULTI-SERVICE ROLE (lambda, s3, cloudwatch, events) -resource "aws_iam_role" "multi_service_role" { +resource "aws_iam_role" "bentley_multi_service_role" { name = "multi_service_role" assume_role_policy = jsonencode({ @@ -61,6 +61,61 @@ resource "aws_iam_policy" "s3_access_policy" { ) } +######################################################################## +# LAMBDA SETUP +# Description: Allows Lambda permission to write to Cloudwatch logs +######################################################################## + +resource "aws_iam_policy" "lambda_execution_policy" { + name = "lambda_execution_policy" + path = "/" + description = "IAM policy for Lambda execution" + + policy = jsonencode({ + Version = "2012-10-17" + Statement = [ + { + Effect = "Allow" + Action = [ + "lambda:InvokeFunction", + "lambda:GetFunction" + ] + Resource = "*" + } + ] + } + ) +} + +######################################################################## +# CLOUDWATCH SETUP +# Description: Give permission for Lambda to write to CloudWatch logs +######################################################################## + +data "aws_iam_policy_document" "cw_document" { + statement { + actions = ["logs:CreateLogGroup"] + resources = [ + "arn:aws:logs:${data.aws_region.current.name}:${data.aws_caller_identity.current.account_id}:*" + ] + } + + statement { + actions = [ + "logs:CreateLogStream", + "logs:CreateLogGroup", + "logs:PutLogEvents" + ] + resources = [ + "arn:aws:logs:${data.aws_region.current.name}:${data.aws_caller_identity.current.account_id}:log-group:/aws/lambda/*" + ] + } +} + +######################################################################## +# POLICY WRITE & ATTACH +######################################################################## + # S3 WRITE POLICY resource "aws_iam_policy" "s3_write_policy" { policy = data.aws_iam_policy_document.s3_data_policy_doc.json @@ -70,16 +125,4 @@ resource "aws_iam_policy" "s3_write_policy" { resource "aws_iam_role_policy_attachment" "lambda_s3_policy_attachment" { role = aws_iam_role.lambda_role.name policy_arn = aws_iam_policy.s3_write_policy.arn -} - -######################################################################## -# LAMBDA SETUP -# Description: Allows Lambda permission to write to Cloudwatch logs -######################################################################## - - - -# Uses Iam policy document to assume role for lambda functions -resource "aws_iam_role" "lambda_role" { - assume_role_policy = data.aws_iam_policy_document.bentley_service_role.json } \ No newline at end of file -- cgit v1.2.3 From cdb4577b5ad7ae1f708797de6bbf17e289bfac14 Mon Sep 17 00:00:00 2001 From: T-Aji Date: Tue, 13 Aug 2024 15:32:33 +0100 Subject: feat/ add logging & split task into 3 helper functions --- src/extract_lambda.py | 140 +++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 109 insertions(+), 31 deletions(-) diff --git a/src/extract_lambda.py b/src/extract_lambda.py index 8317ef8..11ea5d1 100644 --- a/src/extract_lambda.py +++ b/src/extract_lambda.py @@ -1,54 +1,132 @@ -from pg8000.native import Connection, Error, DatabaseError, InterfaceError +from pg8000.native import Connection, DatabaseError, InterfaceError from dotenv import load_dotenv import os import boto3 import csv from botocore.exceptions import ClientError +import logging +import json +logger = logging.getLogger() +logger.setLevel(logging.INFO) load_dotenv() -def lambda_handler(event, context): - client = boto3.client('s3') -# temporary credentials for dev- will not have access when uploaded +database = os.getenv('database') +user = os.getenv('user') +password = os.getenv('password') +host = os.getenv('host') +port = os.getenv('port') + +def lambda_handler(event, context): + """This lambda function connects to the Totesys database, lists the contents of the ingestion bucket, + and converts all tables to CSV and if any of those tables do not exist in, or are different to the ones in s3, it uploads them + it uses 3 helper functions to achieve these 3 functionalities + """ + try: + db = connect_to_database() + existing_files = list_existing_s3_files() + any_changes = process_and_upload_tables(db, existing_files) + + if not any_changes: + logger.info("No changes detected in the database.") + return { + 'statusCode': 200, + 'body': json.dumps('No changes detected, no CSV files were uploaded.') + } + else: + return { + 'statusCode': 200, + 'body': json.dumps('CSV files processed and uploaded successfully.') + } + + except Exception as e: + logger.error(f'Error: {e}') + return { + 'statusCode': 500, + 'body': json.dumps('Internal server error.') + } - database = os.getenv('database') - user = os.getenv('user') - password = os.getenv('password') - host = os.getenv('host') - port = os.getenv('port') + finally: + + if db: + db.close() +def connect_to_database(): try: - db = Connection( - database=database, - user=user, - password=password, - host=host, - port=port + return Connection( + database=database, + user=user, + password=password, + host=host, + port=port ) except DatabaseError as e: - print(e) + logger.error(f'Database error: {e}') + raise except InterfaceError as i: - print(i) - #replace prints with upload to cloudwatch logs + logger.error(f'Interface error: {i}') + raise + + +def list_existing_s3_files(): + """Creates a dictionary and populates it with the + results of listing the contents of the s3 bucket, then + returns the populated dictionary + """ + client = boto3.client('s3') + existing_files = {} + + try: + response = client.list_objects_v2(Bucket=ingestion_bucket) + + if 'Contents' in response: + for obj in response['Contents']: + s3_key = obj['Key'] + try: + file_obj = client.get_object(Bucket=ingestion_bucket, Key=s3_key) + file_content = file_obj['Body'].read().decode('utf-8') + existing_files[s3_key] = file_content + except ClientError as e: + logger.error(f'Error retrieving S3 object {s3_key}: {e}') + + except ClientError as e: + logger.error(f'Error listing S3 objects: {e}') + + return existing_files + + + +def process_and_upload_tables(db, existing_files): + """Creates a list of the tables from a database query and + then selects everything from each table in individual queries + it then writes each table to CSV files and compares with the item + in the existing_files dictionary with the same name. If it finds sny changes + to files, or new tables/files it uploads them to the s3 bucket + """ + client = boto3.client('s3') tables = db.run("SELECT table_name FROM information_schema.tables WHERE table_schema='public' AND table_type='BASE TABLE';") + for table in tables: table_name = table[0] rows = db.run(f"SELECT * FROM {table_name};") - # this saves the csv files to the repo root before writing to s3, this is unnecessary. how will the lambda behave when it attempts to save files? - with open(f"{table_name}.csv", "w", newline='') as file: + + + csv_file_path = f"/tmp/{table_name}.csv" + with open(csv_file_path, "w", newline='') as file: writer = csv.writer(file) - writer.writerow([desc["name"] for desc in db.columns(f"SELECT * FROM {table_name};")]) + column_names = [desc["name"] for desc in db.columns(f"SELECT * FROM {table_name};")] + writer.writerow(column_names) writer.writerows(rows) - try: - client.upload_file(file, Bucket='ingestion-bucket', Object_name=table_name) - - except ClientError as e: - print(e) - #replace print with upload to cloudwatch logs - - if db: - db.close() + + s3_key = f"{table_name}/latest.csv" + new_csv_content = open(csv_file_path, "r").read() + - \ No newline at end of file + if s3_key not in existing_files or existing_files[s3_key] != new_csv_content: + try: + client.upload_file(csv_file_path, ingestion_bucket, s3_key) + logger.info(f"Uploaded {s3_key} to S3.") + except ClientError as e: + logger.error(f'Error uploading to S3: {e}') \ No newline at end of file -- cgit v1.2.3 From bb1665fd08d8abf10930875272bdc2b7f8a4c681 Mon Sep 17 00:00:00 2001 From: Ang Bel Date: Tue, 13 Aug 2024 16:25:06 +0100 Subject: rds.tf file with some placehodlers. Additional set up is required --- terraform/lambda.tf | 74 ++++++++++++++++++++++++++++++++++++++++++++++++++ terraform/rds.tf | 78 +++++++++++++++++++++++++++++++++++++++++++++++++++++ terraform/vars.tf | 10 +++++++ 3 files changed, 162 insertions(+) create mode 100644 terraform/lambda.tf create mode 100644 terraform/rds.tf diff --git a/terraform/lambda.tf b/terraform/lambda.tf new file mode 100644 index 0000000..09d6697 --- /dev/null +++ b/terraform/lambda.tf @@ -0,0 +1,74 @@ +### EXTRACT LAMBDA SET UP +data "archive_file" "extract_lambda_zip" { + type = "zip" + source_file = "${path.module}/../src/extract_lambda.py" + output_path = "${path.module}/../extract_function.zip" +} + +resource "aws_lambda_function" "extract_lambda" { + function_name = "${var.extract_lambda_name}" + s3_bucket = aws_s3_bucket.lambda_bucket.bucket + s3_key = "extract_lambda/extract_function.zip" + role = aws_iam_role.PLACEHOLDER_extract_lambda_role.arn # << lambda role placehodler + handler = "extract_lambda.lambda_handler" # << check that the function is called lambda handler + runtime = "python3.11" + environment { + variables = { + output = aws_s3_bucket.extract_bucket.bucket + } + } +} + +resource "aws_lambda_permission" "allow_to_write_to_s3_extract_bucket" { + action = "lambda:InvokeFunction" + function_name = aws_lambda_function.extract_lambda.function_name + principal = "s3.amazonaws.com" + source_arn = aws_s3_bucket.extract_bucket.arn +} + + +### TRANSFORM LAMBDA SET UP +data "archive_file" "transform_lambda_zip" { + type = "zip" + source_file = "${path.module}/../src/transform_lambda.py" + output_path = "${path.module}/../transform_function.zip" +} + +resource "aws_lambda_function" "transform_lambda" { + function_name = "${var.transform_lambda_name}" + s3_bucket = aws_s3_bucket.lambda_bucket.bucket + s3_key = "transform_lambda/transform_function.zip" + role = aws_iam_role.PLACEHOLDER_transform_lambda_role.arn # << lambda role placehodler + handler = "transform_lambda.lambda_handler" # << check that the function is called lambda handler + runtime = "python3.11" + environment { + variables = { + output = aws_s3_bucket.transform_bucket.bucket + } + } +} + +resource "aws_lambda_permission" "allow_to_write_to_s3_transform_bucket" { + action = "lambda:InvokeFunction" + function_name = aws_lambda_function.transform_lambda.function_name + principal = "s3.amazonaws.com" + source_arn = aws_s3_bucket.transform_bucket.arn +} + + +### LOAD LAMBDA SET UP +data "archive_file" "load_lambda_zip" { + type = "zip" + source_file = "${path.module}/../src/load_lambda.py" + output_path = "${path.module}/../load_function.zip" +} + +resource "aws_lambda_function" "load_lambda" { + function_name = "${var.load_lambda_name}" + s3_bucket = aws_s3_bucket.lambda_bucket.bucket + s3_key = "load_lambda/load_function.zip" + role = aws_iam_role.PLACEHOLDER_load_lambda_role.arn # << lambda role placehodler + handler = "load_lambda.lambda_handler" # << check that the function is called lambda handler + runtime = "python3.11" +} + diff --git a/terraform/rds.tf b/terraform/rds.tf new file mode 100644 index 0000000..4b25c5f --- /dev/null +++ b/terraform/rds.tf @@ -0,0 +1,78 @@ +data "aws_availability_zones" "available" {} + +module "vpc" { + source = "terraform-aws-modules/vpc/aws" + version = "2.77.0" + + name = "${var.project_name}" + cidr = "10.0.0.0/16" + azs = data.aws_availability_zones.available.names + public_subnets = ["10.0.4.0/24", "10.0.5.0/24", "10.0.6.0/24"] + enable_dns_hostnames = true + enable_dns_support = true +} + +resource "aws_db_subnet_group" "Terrific-Totes-sub-gr" { + name = "TT-db-subnet" + subnet_ids = module.vpc.public_subnets + + tags = { + Name = "${var.project_name}" + } +} + +resource "aws_security_group" "rds" { + name = "${var.project_name}-rds" + vpc_id = module.vpc.vpc_id + + ingress { + from_port = 5432 + to_port = 5432 + protocol = "tcp" + cidr_blocks = ["0.0.0.0/0"] + } + + egress { + from_port = 5432 + to_port = 5432 + protocol = "tcp" + cidr_blocks = ["0.0.0.0/0"] + } + + tags = { + Name = "${var.project_name}-rds" + } +} + +resource "aws_db_parameter_group" "Terrific-Totes-param-gr" { + name = "TT-db-param" + family = "postgres14" + + parameter { + name = "log_connections" + value = "1" + } +} + +resource "aws_db_instance" "Terrific-Totes-rds" { + db_name = "${var.project_name}" + instance_class = "db.t3.micro" + allocated_storage = 5 + engine = "postgres" + engine_version = "14.1" + username = "user credentials for the root user" # we could use .env here + password = "user password for the root user" # we could use .env here + ### alternatively to providing username nad password we can specify: +# resource "aws_kms_key" "example_key" { +# description = "Example KMS Key" +# } +# within the resource: +# manage_master_user_password = true +# master_user_secret_kms_key_id = aws_kms_key.example.key_id +# } + db_subnet_group_name = aws_db_subnet_group.Terrific-Totes-sub-gr.name + vpc_security_group_ids = [aws_security_group.rds.id] + parameter_group_name = aws_db_parameter_group.Terrific-Totes-param-gr.name + publicly_accessible = false + skip_final_snapshot = true +} \ No newline at end of file diff --git a/terraform/vars.tf b/terraform/vars.tf index fa84222..350c2c6 100644 --- a/terraform/vars.tf +++ b/terraform/vars.tf @@ -23,6 +23,16 @@ variable "transform_lambda_name" { default = "transform-lambda" } +variable "load_lambda_name" { + type = string + default = "load-lambda" +} + +variable "project_name" { + type = string + default = "Terrific-Totes" +} + data "aws_caller_identity" "current" {} data "aws_region" "current" {} \ No newline at end of file -- cgit v1.2.3 From 6c7914a9d33fbaa962cf1f083c2ee79ace62f401 Mon Sep 17 00:00:00 2001 From: Ang Bel Date: Tue, 13 Aug 2024 16:33:56 +0100 Subject: restore load_lambda script --- src/load_lambda.py | 52 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 52 insertions(+) diff --git a/src/load_lambda.py b/src/load_lambda.py index e69de29..5c6718c 100644 --- a/src/load_lambda.py +++ b/src/load_lambda.py @@ -0,0 +1,52 @@ +### Example taken from https://medium.com/@pranay1001090/how-to-load-data-from-amazon-s3-csv-parquet-to-aws-rds-using-python-3dc51dd2186e + +### THIS IS AN EXAMPLE CODE WE CAN PICK FROM, NONE OF THIS HAS BEEN CUSTOMISED YET + +import boto3 +import pandas as pd +import pyarrow.parquet as pq +from io import BytesIO +from sqlalchemy import create_engine + +# AWS credentials and region +aws_access_key = '' +aws_secret_key = '' +region_name = '' + +# S3 bucket and file details +bucket_name = '' +file_prefix = '' +s3_client = boto3.client('s3', aws_access_key_id=aws_access_key, aws_secret_access_key=aws_secret_key, region_name=region_name) + +# RDS connection details +database_name = '' +table_name = '' +rds_host = '' +rds_port = '' +rds_user = '' +rds_password = '' +# Function to load Parquet files into a Pandas DataFrame +def load_parquet_data(s3_bucket, s3_prefix): + file_objects = s3_client.list_objects_v2(Bucket=s3_bucket, Prefix=s3_prefix)['Contents'] + dfs = [] + for file_object in file_objects: + file_key = file_object['Key'] + file_obj = s3_client.get_object(Bucket=s3_bucket, Key=file_key) + parquet_file = pq.ParquetFile(BytesIO(file_obj['Body'].read())) + df = parquet_file.read().to_pandas() + dfs.append(df) + return pd.concat(dfs) + +# Load Parquet data from S3 into a Pandas DataFrame +df = load_parquet_data(bucket_name, file_prefix) +# Connect to RDS +conn_str = f'mysql+pymysql://{rds_user}:{rds_password}@{rds_host}:{rds_port}/{database_name}' +engine = create_engine(conn_str) + +# Write the DataFrame to RDS +df.to_sql(table_name, con=engine, if_exists='replace', index=False) + +# Closing the connection +engine.dispose() + +print('Data loaded successfully!') \ No newline at end of file -- cgit v1.2.3 From 68a0b4740e1aab2c507547ab985c7c1dc436d9c9 Mon Sep 17 00:00:00 2001 From: lian-manonog Date: Tue, 13 Aug 2024 17:16:12 +0100 Subject: wip: running terraform apply to continue fixing terraform infrastructure --- .gitignore | 5 +++++ terraform/events.tf | 4 ++-- terraform/iam.tf | 64 ++++++++++++++++++++++++++++------------------------- terraform/lambda.tf | 13 +++++------ terraform/s3.tf | 63 ++++++++++++++++++++++++++++++---------------------- 5 files changed, 84 insertions(+), 65 deletions(-) create mode 100644 .gitignore diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..5861f48 --- /dev/null +++ b/.gitignore @@ -0,0 +1,5 @@ +*.tfstate +*.tfstate.* +*.tfvars +*.tfvars.json +.terraform.tfstate.lock.info \ No newline at end of file diff --git a/terraform/events.tf b/terraform/events.tf index 0196dc3..7a6b0ad 100644 --- a/terraform/events.tf +++ b/terraform/events.tf @@ -50,7 +50,7 @@ resource "aws_s3_bucket_notification" "extract_bucket_notification" { ###### -resource "aws_lambda_permission" "allow_s3_transfrom_bucket" { +resource "aws_lambda_permission" "allow_s3_transform_bucket" { statement_id = "AllowS3InvokeLambdaTransform" action = "lambda:InvokeFunction" function_name = aws_lambda_function.transform_lambda.function_name #replaced lambda name placeholder @@ -67,5 +67,5 @@ resource "aws_s3_bucket_notification" "transform_bucket_notification" { lambda_function_arn = aws_lambda_function.transform_lambda.arn #replaced lambda name placeholder } - depends_on = [aws_lambda_permission.allow_s3_transform] + depends_on = [aws_lambda_permission.allow_s3_transform_bucket] } \ No newline at end of file diff --git a/terraform/iam.tf b/terraform/iam.tf index bb8d932..f34d58a 100644 --- a/terraform/iam.tf +++ b/terraform/iam.tf @@ -4,7 +4,7 @@ ######################################################################## # DEFINE MULTI-SERVICE ROLE (lambda, s3, cloudwatch, events) -resource "aws_iam_role" "bentley_multi_service_role" { +resource "aws_iam_role" "multi_service_role" { name = "multi_service_role" assume_role_policy = jsonencode({ @@ -16,7 +16,7 @@ resource "aws_iam_role" "bentley_multi_service_role" { Principal = { Service = [ "lambda.amazonaws.com", - "states.amazonaws.com", + "cloudwatch.amazonaws.com", "events.amazonaws.com", "s3.amazonaws.com" ] @@ -27,7 +27,6 @@ resource "aws_iam_role" "bentley_multi_service_role" { } - ######################################################################## # S3 SETUP # Description: allows allows retention/tagging/access control settings @@ -35,32 +34,23 @@ resource "aws_iam_role" "bentley_multi_service_role" { ######################################################################## # S3 DEFINE POLICY -resource "aws_iam_policy" "s3_access_policy" { - name = "s3_access_policy" - path = "/" - description = "IAM policy for S3 access" - - policy = jsonencode({ - Version = "2012-10-17" - Statement = [ - { - Effect = "Allow" - Action = [ - "s3:PutObject", - "s3:GetObject", - "s3:ListBucket" - ] - resources = [ - "${aws_s3_bucket.extract_bucket.arn}/*", - "${aws_s3_bucket.transform_bucket.arn}/*", - "${aws_s3_bucket.lambda_bucket.arn}/*" - ] - } - ] - } - ) +data "aws_iam_policy_document" "s3_data_policy_doc" { + statement { + actions = [ + "s3:PutObject", + "s3:PutObjectRetention", + "s3:PutObjectTagging", + "s3:PutObjectAcl" + ] + resources = [ + "${aws_s3_bucket.extract_bucket.arn}/*", + "${aws_s3_bucket.transform_bucket.arn}/*", + "${aws_s3_bucket.lambda_code_bucket.arn}/*", + ] + } } + ######################################################################## # LAMBDA SETUP # Description: Allows Lambda permission to write to Cloudwatch logs @@ -112,6 +102,11 @@ data "aws_iam_policy_document" "cw_document" { } } +resource "aws_iam_policy" "cw_policy" { + name = "cw_policy" + policy = data.aws_iam_policy_document.cw_document.json +} + ######################################################################## # POLICY WRITE & ATTACH ######################################################################## @@ -123,6 +118,15 @@ resource "aws_iam_policy" "s3_write_policy" { # S3 ATTACH POLICY resource "aws_iam_role_policy_attachment" "lambda_s3_policy_attachment" { - role = aws_iam_role.lambda_role.name - policy_arn = aws_iam_policy.s3_write_policy.arn -} \ No newline at end of file + for_each = toset([ + aws_iam_policy.s3_write_policy.arn, + aws_iam_policy.lambda_execution_policy.arn, + aws_iam_policy.cw_policy.arn + ]) + role = aws_iam_role.multi_service_role.name + policy_arn = each.value +} + +################ +# RDS POLICIES # +################ diff --git a/terraform/lambda.tf b/terraform/lambda.tf index 09d6697..bcbf394 100644 --- a/terraform/lambda.tf +++ b/terraform/lambda.tf @@ -7,9 +7,9 @@ data "archive_file" "extract_lambda_zip" { resource "aws_lambda_function" "extract_lambda" { function_name = "${var.extract_lambda_name}" - s3_bucket = aws_s3_bucket.lambda_bucket.bucket + s3_bucket = aws_s3_bucket.lambda_code_bucket.bucket s3_key = "extract_lambda/extract_function.zip" - role = aws_iam_role.PLACEHOLDER_extract_lambda_role.arn # << lambda role placehodler + role = aws_iam_role.multi_service_role.arn #<< lambda role placehodler handler = "extract_lambda.lambda_handler" # << check that the function is called lambda handler runtime = "python3.11" environment { @@ -36,9 +36,9 @@ data "archive_file" "transform_lambda_zip" { resource "aws_lambda_function" "transform_lambda" { function_name = "${var.transform_lambda_name}" - s3_bucket = aws_s3_bucket.lambda_bucket.bucket + s3_bucket = aws_s3_bucket.lambda_code_bucket.bucket s3_key = "transform_lambda/transform_function.zip" - role = aws_iam_role.PLACEHOLDER_transform_lambda_role.arn # << lambda role placehodler + role = aws_iam_role.multi_service_role.arn # << lambda role placehodler handler = "transform_lambda.lambda_handler" # << check that the function is called lambda handler runtime = "python3.11" environment { @@ -55,7 +55,6 @@ resource "aws_lambda_permission" "allow_to_write_to_s3_transform_bucket" { source_arn = aws_s3_bucket.transform_bucket.arn } - ### LOAD LAMBDA SET UP data "archive_file" "load_lambda_zip" { type = "zip" @@ -65,9 +64,9 @@ data "archive_file" "load_lambda_zip" { resource "aws_lambda_function" "load_lambda" { function_name = "${var.load_lambda_name}" - s3_bucket = aws_s3_bucket.lambda_bucket.bucket + s3_bucket = aws_s3_bucket.lambda_code_bucket.bucket s3_key = "load_lambda/load_function.zip" - role = aws_iam_role.PLACEHOLDER_load_lambda_role.arn # << lambda role placehodler + role = aws_iam_role.multi_service_role.arn # << lambda role placehodler handler = "load_lambda.lambda_handler" # << check that the function is called lambda handler runtime = "python3.11" } diff --git a/terraform/s3.tf b/terraform/s3.tf index 8cb65ef..8ab5622 100644 --- a/terraform/s3.tf +++ b/terraform/s3.tf @@ -1,40 +1,51 @@ ### EXTRACT BUCKET SET-UP resource "aws_s3_bucket" "extract_bucket" { - bucket = "${var.s3_extract_bucket_name}" + bucket_prefix = "${var.s3_extract_bucket_name}-" } -resource "aws_s3_object" "extract_lambda_code" { - bucket = aws_s3_bucket.s3_code_bucket_name.bucket - key = "${var.extract_lambda_name}/extract_function.zip" - source = "${path.module}/../extract_function.zip" -} # << can't figure out how this is being used but we seem to need it - -resource "aws_s3_bucket_notification" "extract_bucket_notification" { - bucket = aws_s3_bucket.extract_bucket.id - lambda_function { - lambda_function_arn = aws_lambda_function.extract_lambda.arn - events = ["s3:ObjectCreated:*"] - } - depends_on = [aws_lambda_permission.allow_to_write_to_s3_extract_bucket] -} # << is this the correct permission dependency? - +# resource "aws_s3_bucket_notification" "extract_bucket_notification" { +# bucket = aws_s3_bucket.extract_bucket.id +# lambda_function { +# lambda_function_arn = aws_lambda_function.extract_lambda.arn +# events = ["s3:ObjectCreated:*"] +# } +# depends_on = [aws_lambda_permission.allow_to_write_to_s3_extract_bucket] +# } # << is this the correct permission dependency? ### TRANSFORM BUCKET SET-UP resource "aws_s3_bucket" "transform_bucket" { - bucket = "${var.s3_transform_bucket_name}" + bucket_prefix = "${var.s3_transform_bucket_name}-" } +# resource "aws_s3_bucket_notification" "transform_bucket_notification" { +# bucket = aws_s3_bucket.transform_bucket.id +# lambda_function { +# lambda_function_arn = aws_lambda_function.transform_lambda.arn +# events = ["s3:ObjectCreated:*"] +# } +# depends_on = [aws_lambda_permission.allow_to_write_to_s3_transform_bucket] +# } # << is this the correct permission dependency? + + +### LAMBDA BUCKET +resource "aws_s3_bucket" "lambda_code_bucket" { + bucket_prefix = "${var.s3_code_bucket_name}-" +} + +resource "aws_s3_object" "extract_lambda_code" { + bucket = aws_s3_bucket.lambda_code_bucket.bucket + key = "${var.extract_lambda_name}/extract_function.zip" + source = "${path.module}/../extract_function.zip" +} # << can't figure out how this is being used but we seem to need it + resource "aws_s3_object" "transform_lambda_code" { - bucket = aws_s3_bucket.s3_code_bucket_name.bucket + bucket = aws_s3_bucket.lambda_code_bucket.bucket key = "${var.transform_lambda_name}/transform_function.zip" source = "${path.module}/../transform_function.zip" } # << can't figure out how this is being used but we seem to need it -resource "aws_s3_bucket_notification" "transform_bucket_notification" { - bucket = aws_s3_bucket.transform_bucket.id - lambda_function { - lambda_function_arn = aws_lambda_function.transform_lambda.arn - events = ["s3:ObjectCreated:*"] - } - depends_on = [aws_lambda_permission.allow_to_write_to_s3_transform_bucket] -} # << is this the correct permission dependency? +resource "aws_s3_object" "load_lambda_code" { + bucket = aws_s3_bucket.lambda_code_bucket.bucket + key = "${var.load_lambda_name}/load_function.zip" + source = "${path.module}/../load_function.zip" +} \ No newline at end of file -- cgit v1.2.3 From 4f0d6f287ae83d7cdc0df6988ab7b9de10912f16 Mon Sep 17 00:00:00 2001 From: T-Aji Date: Wed, 14 Aug 2024 12:25:57 +0100 Subject: feat/passing tests to helper function list_existing_s3_files --- .gitignore | 3 +++ src/extract_lambda.py | 12 ++++++----- tests/dummy.txt | 1 + tests/test_extract_lambda.py | 49 ++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 60 insertions(+), 5 deletions(-) create mode 100644 .gitignore create mode 100644 tests/dummy.txt create mode 100644 tests/test_extract_lambda.py diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..428f94e --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +venv +.env +__pycache__/ \ No newline at end of file diff --git a/src/extract_lambda.py b/src/extract_lambda.py index 11ea5d1..dc70590 100644 --- a/src/extract_lambda.py +++ b/src/extract_lambda.py @@ -18,6 +18,7 @@ password = os.getenv('password') host = os.getenv('host') port = os.getenv('port') + def lambda_handler(event, context): """This lambda function connects to the Totesys database, lists the contents of the ingestion bucket, and converts all tables to CSV and if any of those tables do not exist in, or are different to the ones in s3, it uploads them @@ -69,27 +70,28 @@ def connect_to_database(): raise - -def list_existing_s3_files(): +def list_existing_s3_files(bucket_name='extract_bucket', client=boto3.client('s3')): """Creates a dictionary and populates it with the results of listing the contents of the s3 bucket, then returns the populated dictionary """ - client = boto3.client('s3') + existing_files = {} try: - response = client.list_objects_v2(Bucket=ingestion_bucket) + response = client.list_objects_v2(Bucket='extract_bucket') if 'Contents' in response: for obj in response['Contents']: s3_key = obj['Key'] try: - file_obj = client.get_object(Bucket=ingestion_bucket, Key=s3_key) + file_obj = client.get_object(Bucket=bucket_name, Key=s3_key) file_content = file_obj['Body'].read().decode('utf-8') existing_files[s3_key] = file_content except ClientError as e: logger.error(f'Error retrieving S3 object {s3_key}: {e}') + else: + logger.error('The bucket is empty') except ClientError as e: logger.error(f'Error listing S3 objects: {e}') diff --git a/tests/dummy.txt b/tests/dummy.txt new file mode 100644 index 0000000..af27ff4 --- /dev/null +++ b/tests/dummy.txt @@ -0,0 +1 @@ +This is a test file. \ No newline at end of file diff --git a/tests/test_extract_lambda.py b/tests/test_extract_lambda.py new file mode 100644 index 0000000..472e93a --- /dev/null +++ b/tests/test_extract_lambda.py @@ -0,0 +1,49 @@ +import pytest +import boto3 +from moto import mock_aws +from src.extract_lambda import list_existing_s3_files #process_and_upload_tables +import os +import logging + + +@pytest.fixture(scope='class') +def aws_credentials(): + os.environ["AWS_ACCESS_KEY_ID"] = 'testing' + os.environ["AWS_SECRET_ACCESS_KEY"] = 'testing' + os.environ["AWS_SECURIT_TOKEN"] = 'testing' + os.environ["AWS_SESSION_TOKEN"] = 'testing' + os.environ["AWS_DEFAULT_REGION"]= 'eu-west-2' + +@pytest.fixture(scope='class') +def s3_client(aws_credentials): + with mock_aws(): + yield boto3.client('s3') + +class TestListExistings3Files(): + def test_error_if_no_bucket(self, s3_client, caplog): + + logger = logging.getLogger() + logger.info('Testing now.') + caplog.set_level(logging.ERROR) + list_existing_s3_files(client=s3_client) + assert 'Error listing S3 objects' in caplog.text + + def test_error_if_bucket_is_empty(self, s3_client, caplog): + + s3_client.create_bucket(Bucket='extract_bucket', + CreateBucketConfiguration={ + 'LocationConstraint': 'eu-west-2' + }) + list_existing_s3_files(client=s3_client) + assert 'The bucket is empty' in caplog.text + + def test_error_retrieving_object(self, s3_client, caplog): + s3_client.upload_file('tests/dummy.txt', 'extract_bucket', 'dummy.txt') + list_existing_s3_files(bucket_name='test_bucket', client=s3_client) + + assert 'Error retrieving S3 object ' in caplog.text + + def test_retrieves_file_content(self, s3_client, caplog): + result = list_existing_s3_files(client=s3_client) + + assert list(result.values()) == ['This is a test file.'] \ No newline at end of file -- cgit v1.2.3 From 0e3faa19dec148fe05f6b2f58fc0331230ec41b7 Mon Sep 17 00:00:00 2001 From: lian-manonog Date: Wed, 14 Aug 2024 12:34:42 +0100 Subject: delete events TBD --- events - TBD?.tf | 52 ---------------------------------------------------- 1 file changed, 52 deletions(-) delete mode 100644 events - TBD?.tf diff --git a/events - TBD?.tf b/events - TBD?.tf deleted file mode 100644 index 25fb35b..0000000 --- a/events - TBD?.tf +++ /dev/null @@ -1,52 +0,0 @@ -resource "aws_cloudwatch_event_rule" "lambda_trigger" { - name = "lambda-scheduled-trigger" - description = "Schedule to trigger the Lambda function" - schedule_expression = "rate(30 minutes)" - -# event_pattern = jsonencode({ -# detail-type = [ -# "AWS Console Sign In via CloudTrail" -# ] -# }) -} - - -resource "aws_cloudwatch_event_target" "lambda" { - rule = aws_cloudwatch_event_rule.lambda_trigger.name - target_id = "TargetFunctionV1" - arn = aws_lambda_function.my_lambda_function.arn -} - - - -resource "aws_lambda_permission" "allow_eventbridge" { - statement_id = "AllowExecutionFromEventBridge" - action = "lambda:InvokeFunction" - function_name = aws_lambda_function.my_lambda_function.function_name - principal = "events.amazonaws.com" - source_arn = aws_cloudwatch_event_rule.lambda_trigger.arn -} - - -# below is step function 1 -resource "aws_lambda_permission" "allow_s3_ingestion" { - statement_id = "AllowS3InvokeLambdaTransform" - action = "lambda:InvokeFunction" - function_name = aws_lambda_function.lambda_transform.function_name - principal = "s3.amazonaws.com" - source_arn = aws_s3_bucket.extract.arn -} - - -resource "aws_s3_bucket_notification" "extract_bucket_notification" { - bucket = aws_s3_bucket.extract.id - - lambda_function { - events = ["s3:ObjectCreated:*"] - lambda_function_arn = aws_lambda_function.lambda_transform.arn - } - - depends_on = [aws_lambda_permission.allow_s3_ingestion] -} - -# need to duplicate and replace "2" with "3" \ No newline at end of file -- cgit v1.2.3 From 9966fcff43cc5e748bc3eb406c270d63fa51ca61 Mon Sep 17 00:00:00 2001 From: lian-manonog Date: Wed, 14 Aug 2024 12:35:25 +0100 Subject: change the s3-key --- terraform/lambda.tf | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/terraform/lambda.tf b/terraform/lambda.tf index bcbf394..a5da972 100644 --- a/terraform/lambda.tf +++ b/terraform/lambda.tf @@ -8,7 +8,7 @@ data "archive_file" "extract_lambda_zip" { resource "aws_lambda_function" "extract_lambda" { function_name = "${var.extract_lambda_name}" s3_bucket = aws_s3_bucket.lambda_code_bucket.bucket - s3_key = "extract_lambda/extract_function.zip" + s3_key = "extract-lambda/extract_function.zip" role = aws_iam_role.multi_service_role.arn #<< lambda role placehodler handler = "extract_lambda.lambda_handler" # << check that the function is called lambda handler runtime = "python3.11" @@ -37,7 +37,7 @@ data "archive_file" "transform_lambda_zip" { resource "aws_lambda_function" "transform_lambda" { function_name = "${var.transform_lambda_name}" s3_bucket = aws_s3_bucket.lambda_code_bucket.bucket - s3_key = "transform_lambda/transform_function.zip" + s3_key = "transform-lambda/transform_function.zip" role = aws_iam_role.multi_service_role.arn # << lambda role placehodler handler = "transform_lambda.lambda_handler" # << check that the function is called lambda handler runtime = "python3.11" @@ -65,7 +65,7 @@ data "archive_file" "load_lambda_zip" { resource "aws_lambda_function" "load_lambda" { function_name = "${var.load_lambda_name}" s3_bucket = aws_s3_bucket.lambda_code_bucket.bucket - s3_key = "load_lambda/load_function.zip" + s3_key = "load-lambda/load_function.zip" role = aws_iam_role.multi_service_role.arn # << lambda role placehodler handler = "load_lambda.lambda_handler" # << check that the function is called lambda handler runtime = "python3.11" -- cgit v1.2.3 From 6caff95389a6054f8fe1d1c9c23db9a251b6b355 Mon Sep 17 00:00:00 2001 From: lian-manonog Date: Wed, 14 Aug 2024 12:35:59 +0100 Subject: change the name of bucket in backend bucket --- terraform/main.tf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/terraform/main.tf b/terraform/main.tf index 3ca9a3d..2a048b9 100644 --- a/terraform/main.tf +++ b/terraform/main.tf @@ -6,7 +6,7 @@ terraform { } } backend "s3" { - bucket = "bentley-secrets" + bucket = "bentley-project-secrets" key = "bentley-project/terraform.tfstate" region = "eu-west-2" } -- cgit v1.2.3 From 4d52771ceee6841febc6179e0b9608fb35a792d9 Mon Sep 17 00:00:00 2001 From: lian-manonog Date: Wed, 14 Aug 2024 12:36:22 +0100 Subject: change the attachment/policies, individual --- terraform/iam.tf | 31 +++++++++++++++++++++++-------- 1 file changed, 23 insertions(+), 8 deletions(-) diff --git a/terraform/iam.tf b/terraform/iam.tf index f34d58a..cf4902a 100644 --- a/terraform/iam.tf +++ b/terraform/iam.tf @@ -117,14 +117,29 @@ resource "aws_iam_policy" "s3_write_policy" { } # S3 ATTACH POLICY -resource "aws_iam_role_policy_attachment" "lambda_s3_policy_attachment" { - for_each = toset([ - aws_iam_policy.s3_write_policy.arn, - aws_iam_policy.lambda_execution_policy.arn, - aws_iam_policy.cw_policy.arn - ]) - role = aws_iam_role.multi_service_role.name - policy_arn = each.value +# resource "aws_iam_role_policy_attachment" "lambda_s3_policy_attachment" { +# for_each = toset([ +# aws_iam_policy.s3_write_policy.arn, +# aws_iam_policy.lambda_execution_policy.arn, +# aws_iam_policy.cw_policy.arn +# ]) +# role = aws_iam_role.multi_service_role.name +# policy_arn = each.value +# } + +resource "aws_iam_role_policy_attachment" "s3_attachment" { + role = aws_iam_role.multi_service_role.name + policy_arn = aws_iam_policy.s3_write_policy.arn +} + +resource "aws_iam_role_policy_attachment" "lambda_attachment" { + role = aws_iam_role.multi_service_role.name + policy_arn = aws_iam_policy.lambda_execution_policy.arn +} + +resource "aws_iam_role_policy_attachment" "cw_attachment" { + role = aws_iam_role.multi_service_role.name + policy_arn = aws_iam_policy.cw_policy.arn } ################ -- cgit v1.2.3 From f2c0429698edafccb0846a58e4020bf419e7e824 Mon Sep 17 00:00:00 2001 From: lian-manonog Date: Wed, 14 Aug 2024 12:37:31 +0100 Subject: style change --- terraform/events.tf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/terraform/events.tf b/terraform/events.tf index 7a6b0ad..6744085 100644 --- a/terraform/events.tf +++ b/terraform/events.tf @@ -4,7 +4,7 @@ resource "aws_cloudwatch_event_rule" "lambda_trigger" { schedule_expression = "rate(30 minutes)" # event_pattern = jsonencode({ -# detail-type = [ +# detail-type = # "AWS Console Sign In via CloudTrail" # ] # }) -- cgit v1.2.3 From 46673b671bef834dc2e043e7845e8a5b8fee9d34 Mon Sep 17 00:00:00 2001 From: lian-manonog Date: Wed, 14 Aug 2024 12:38:08 +0100 Subject: update gitignore --- .gitignore | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index 5861f48..239c7e0 100644 --- a/.gitignore +++ b/.gitignore @@ -2,4 +2,5 @@ *.tfstate.* *.tfvars *.tfvars.json -.terraform.tfstate.lock.info \ No newline at end of file +.terraform.tfstate.lock.info +*.zip \ No newline at end of file -- cgit v1.2.3 From 101e1e24cb38b6a45661b723881e2b2d6dd2fb07 Mon Sep 17 00:00:00 2001 From: lian-manonog Date: Wed, 14 Aug 2024 14:35:05 +0100 Subject: wip: terraform debugging --- .gitignore | 5 ++++- src/load_lambda.py | 2 ++ src/transform_lambda.py | 2 ++ terraform/events.tf | 18 ++++++++++-------- terraform/s3.tf | 34 +++++++++++++++++----------------- 5 files changed, 35 insertions(+), 26 deletions(-) diff --git a/.gitignore b/.gitignore index 239c7e0..d759665 100644 --- a/.gitignore +++ b/.gitignore @@ -3,4 +3,7 @@ *.tfvars *.tfvars.json .terraform.tfstate.lock.info -*.zip \ No newline at end of file +*.zip +.terraform/ +.terraform* +log* \ No newline at end of file diff --git a/src/load_lambda.py b/src/load_lambda.py index e69de29..6ee681f 100644 --- a/src/load_lambda.py +++ b/src/load_lambda.py @@ -0,0 +1,2 @@ +def lambda_handler(): + pass \ No newline at end of file diff --git a/src/transform_lambda.py b/src/transform_lambda.py index e69de29..6ee681f 100644 --- a/src/transform_lambda.py +++ b/src/transform_lambda.py @@ -0,0 +1,2 @@ +def lambda_handler(): + pass \ No newline at end of file diff --git a/terraform/events.tf b/terraform/events.tf index 6744085..9fd89e4 100644 --- a/terraform/events.tf +++ b/terraform/events.tf @@ -1,7 +1,17 @@ +resource "aws_cloudwatch_event_target" "extract_lambda_cw_event" { + rule = aws_cloudwatch_event_rule.lambda_trigger.name + target_id = "TargetFunctionV1" + arn = aws_lambda_function.extract_lambda.arn #replaced lambda name placeholder + force_destroy = true +} + resource "aws_cloudwatch_event_rule" "lambda_trigger" { name = "lambda-scheduled-trigger" description = "Schedule to trigger the Lambda function" schedule_expression = "rate(30 minutes)" + force_destroy = true + # depends_on = [ + # aws_cloudwatch_event_target.extract_lambda_cw_event] # event_pattern = jsonencode({ # detail-type = @@ -10,14 +20,6 @@ resource "aws_cloudwatch_event_rule" "lambda_trigger" { # }) } - -resource "aws_cloudwatch_event_target" "extract_lambda_cw_event" { - rule = aws_cloudwatch_event_rule.lambda_trigger.name - target_id = "TargetFunctionV1" - arn = aws_lambda_function.extract_lambda.arn #replaced lambda name placeholder -} - - resource "aws_lambda_permission" "allow_eventbridge" { statement_id = "AllowExecutionFromEventBridge" action = "lambda:InvokeFunction" diff --git a/terraform/s3.tf b/terraform/s3.tf index 8ab5622..4c06b8e 100644 --- a/terraform/s3.tf +++ b/terraform/s3.tf @@ -32,20 +32,20 @@ resource "aws_s3_bucket" "lambda_code_bucket" { bucket_prefix = "${var.s3_code_bucket_name}-" } -resource "aws_s3_object" "extract_lambda_code" { - bucket = aws_s3_bucket.lambda_code_bucket.bucket - key = "${var.extract_lambda_name}/extract_function.zip" - source = "${path.module}/../extract_function.zip" -} # << can't figure out how this is being used but we seem to need it - -resource "aws_s3_object" "transform_lambda_code" { - bucket = aws_s3_bucket.lambda_code_bucket.bucket - key = "${var.transform_lambda_name}/transform_function.zip" - source = "${path.module}/../transform_function.zip" -} # << can't figure out how this is being used but we seem to need it - -resource "aws_s3_object" "load_lambda_code" { - bucket = aws_s3_bucket.lambda_code_bucket.bucket - key = "${var.load_lambda_name}/load_function.zip" - source = "${path.module}/../load_function.zip" -} \ No newline at end of file +# resource "aws_s3_object" "extract_lambda_code" { +# bucket = aws_s3_bucket.lambda_code_bucket.bucket +# key = "${var.extract_lambda_name}/extract_function.zip" +# source = "${path.module}/../extract_function.zip" +# } # << can't figure out how this is being used but we seem to need it + +# resource "aws_s3_object" "transform_lambda_code" { +# bucket = aws_s3_bucket.lambda_code_bucket.bucket +# key = "${var.transform_lambda_name}/transform_function.zip" +# source = "${path.module}/../transform_function.zip" +# } # << can't figure out how this is being used but we seem to need it + +# resource "aws_s3_object" "load_lambda_code" { +# bucket = aws_s3_bucket.lambda_code_bucket.bucket +# key = "${var.load_lambda_name}/load_function.zip" +# source = "${path.module}/../load_function.zip" +# } \ No newline at end of file -- cgit v1.2.3 From 45e025ac0c4ae8c721cb0b875fd0abd67cc2bc07 Mon Sep 17 00:00:00 2001 From: T-Aji Date: Wed, 14 Aug 2024 15:53:11 +0100 Subject: test: passing test for function connect_to_database --- src/extract_lambda.py | 40 +++++++++++++++++++++++++--------------- tests/test_extract_lambda.py | 40 +++++++++++++++++++++++++++++++++++++--- 2 files changed, 62 insertions(+), 18 deletions(-) diff --git a/src/extract_lambda.py b/src/extract_lambda.py index dc70590..6e94bba 100644 --- a/src/extract_lambda.py +++ b/src/extract_lambda.py @@ -1,6 +1,5 @@ from pg8000.native import Connection, DatabaseError, InterfaceError -from dotenv import load_dotenv -import os +from dotenv import dotenv_values import boto3 import csv from botocore.exceptions import ClientError @@ -9,16 +8,15 @@ import json logger = logging.getLogger() logger.setLevel(logging.INFO) -load_dotenv() - - -database = os.getenv('database') -user = os.getenv('user') -password = os.getenv('password') -host = os.getenv('host') -port = os.getenv('port') +class DBConnectionException(Exception): + """Wraps pg8000.native Error or DatabaseError.""" + def __init__(self, e): + """Initialise with provided error message.""" + self.message = str(e) + super().__init__(self.message) + def lambda_handler(event, context): """This lambda function connects to the Totesys database, lists the contents of the ingestion bucket, and converts all tables to CSV and if any of those tables do not exist in, or are different to the ones in s3, it uploads them @@ -53,8 +51,19 @@ def lambda_handler(event, context): if db: db.close() -def connect_to_database(): +def get_config(path: str = ".env") -> dict: + return dotenv_values(path) + + +def connect_to_database() -> Connection: try: + config = get_config() + host = config["host"] + port = config["port"] + user = config["user"] + password = config["password"] + database = config["database"] + return Connection( database=database, user=user, @@ -62,12 +71,13 @@ def connect_to_database(): host=host, port=port ) - except DatabaseError as e: - logger.error(f'Database error: {e}') - raise + # except DatabaseError as e: + # logger.error(f'Database error: {e}') + # raise except InterfaceError as i: logger.error(f'Interface error: {i}') - raise + raise DBConnectionException("Failed to connect to database") + def list_existing_s3_files(bucket_name='extract_bucket', client=boto3.client('s3')): diff --git a/tests/test_extract_lambda.py b/tests/test_extract_lambda.py index 472e93a..18c49fc 100644 --- a/tests/test_extract_lambda.py +++ b/tests/test_extract_lambda.py @@ -1,10 +1,24 @@ import pytest import boto3 from moto import mock_aws -from src.extract_lambda import list_existing_s3_files #process_and_upload_tables +from unittest.mock import patch +from unittest import TestCase +from src.extract_lambda import list_existing_s3_files, connect_to_database, DBConnectionException #process_and_upload_tables import os import logging +@pytest.fixture(scope='class') +def mock_config(): + env_vars = { + "host": "abc", + "port": "5432", + "user": "def", + "password": "password", + "database": "db", + } + with patch("src.extract_lambda.get_config", return_value=env_vars) as mock_config: + yield mock_config + @pytest.fixture(scope='class') def aws_credentials(): @@ -19,7 +33,7 @@ def s3_client(aws_credentials): with mock_aws(): yield boto3.client('s3') -class TestListExistings3Files(): +class TestListExistings3Files: def test_error_if_no_bucket(self, s3_client, caplog): logger = logging.getLogger() @@ -46,4 +60,24 @@ class TestListExistings3Files(): def test_retrieves_file_content(self, s3_client, caplog): result = list_existing_s3_files(client=s3_client) - assert list(result.values()) == ['This is a test file.'] \ No newline at end of file + assert list(result.values()) == ['This is a test file.'] + +class TestConnectToDatabase: + def test_connect_to_database(mock_conn, mock_config): + with patch("src.extract_lambda.Connection", autospec=True) as mock_conn: + connect_to_database() + mock_conn.assert_called_with( + host="abc", user="def", port="5432", password="password", database="db" + ) + + def test_database_error(self, mock_config): + with pytest.raises(DBConnectionException): + connect_to_database() + + def test_logs_interface_error(self, caplog): + logger = logging.getLogger() + logger.info('Testing now.') + caplog.set_level(logging.ERROR) + with pytest.raises(DBConnectionException): + connect_to_database() + assert 'Interface error' in caplog.text \ No newline at end of file -- cgit v1.2.3 From e95e9e59d2a36a2d700bfa199f6b5ece3c49233d Mon Sep 17 00:00:00 2001 From: Ellie Date: Wed, 14 Aug 2024 16:35:32 +0100 Subject: chage rate 30 seconds --- terraform/events.tf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/terraform/events.tf b/terraform/events.tf index 9fd89e4..ef01feb 100644 --- a/terraform/events.tf +++ b/terraform/events.tf @@ -8,7 +8,7 @@ resource "aws_cloudwatch_event_target" "extract_lambda_cw_event" { resource "aws_cloudwatch_event_rule" "lambda_trigger" { name = "lambda-scheduled-trigger" description = "Schedule to trigger the Lambda function" - schedule_expression = "rate(30 minutes)" + schedule_expression = "rate(30 seconds)" force_destroy = true # depends_on = [ # aws_cloudwatch_event_target.extract_lambda_cw_event] -- cgit v1.2.3 From 911b2a4ba08e59f46a53b5252a044a5853796f78 Mon Sep 17 00:00:00 2001 From: Alex Schofield Date: Wed, 14 Aug 2024 16:38:36 +0100 Subject: test(tf): attempt to solve ResourceConflictException --- terraform/events.tf | 35 ++++++++--------- terraform/iam.tf | 60 +++++++++++++++++++++-------- terraform/lambda.tf | 109 +++++++++++++++++++++++++--------------------------- terraform/main.tf | 26 ++++++------- terraform/s3.tf | 43 ++------------------- terraform/vars.tf | 26 ++++++------- 6 files changed, 139 insertions(+), 160 deletions(-) diff --git a/terraform/events.tf b/terraform/events.tf index 9fd89e4..d2e2eb5 100644 --- a/terraform/events.tf +++ b/terraform/events.tf @@ -1,23 +1,18 @@ -resource "aws_cloudwatch_event_target" "extract_lambda_cw_event" { - rule = aws_cloudwatch_event_rule.lambda_trigger.name - target_id = "TargetFunctionV1" - arn = aws_lambda_function.extract_lambda.arn #replaced lambda name placeholder - force_destroy = true -} - resource "aws_cloudwatch_event_rule" "lambda_trigger" { name = "lambda-scheduled-trigger" description = "Schedule to trigger the Lambda function" schedule_expression = "rate(30 minutes)" - force_destroy = true - # depends_on = [ - # aws_cloudwatch_event_target.extract_lambda_cw_event] - -# event_pattern = jsonencode({ -# detail-type = -# "AWS Console Sign In via CloudTrail" -# ] -# }) + + lifecycle { + create_before_destroy = true + } +} + +resource "aws_cloudwatch_event_target" "extract_lambda_cw_event" { + rule = aws_cloudwatch_event_rule.lambda_trigger.name + target_id = "TargetFunctionV1" + arn = aws_lambda_function.extract_lambda.arn #replaced lambda name placeholder + depends_on = [aws_lambda_permission.allow_eventbridge] } resource "aws_lambda_permission" "allow_eventbridge" { @@ -25,7 +20,7 @@ resource "aws_lambda_permission" "allow_eventbridge" { action = "lambda:InvokeFunction" function_name = aws_lambda_function.extract_lambda.function_name #replaced lambda name placeholder principal = "events.amazonaws.com" - source_arn = aws_cloudwatch_event_rule.lambda_trigger.arn + source_arn = aws_cloudwatch_event_rule.lambda_trigger.arn } @@ -43,7 +38,7 @@ resource "aws_s3_bucket_notification" "extract_bucket_notification" { bucket = aws_s3_bucket.extract_bucket.id #replaced bucket name placeholder lambda_function { - events = ["s3:ObjectCreated:*"] + events = ["s3:ObjectCreated:*"] lambda_function_arn = aws_lambda_function.transform_lambda.arn #replaced lambda name placeholder } @@ -65,9 +60,9 @@ resource "aws_s3_bucket_notification" "transform_bucket_notification" { bucket = aws_s3_bucket.transform_bucket.id #replaced bucket name placeholder lambda_function { - events = ["s3:ObjectCreated:*"] + events = ["s3:ObjectCreated:*"] lambda_function_arn = aws_lambda_function.transform_lambda.arn #replaced lambda name placeholder } depends_on = [aws_lambda_permission.allow_s3_transform_bucket] -} \ No newline at end of file +} diff --git a/terraform/iam.tf b/terraform/iam.tf index cf4902a..6c6b4fc 100644 --- a/terraform/iam.tf +++ b/terraform/iam.tf @@ -16,9 +16,7 @@ resource "aws_iam_role" "multi_service_role" { Principal = { Service = [ "lambda.amazonaws.com", - "cloudwatch.amazonaws.com", - "events.amazonaws.com", - "s3.amazonaws.com" + "scheduler.amazonaws.com" ] } } @@ -57,22 +55,22 @@ data "aws_iam_policy_document" "s3_data_policy_doc" { ######################################################################## resource "aws_iam_policy" "lambda_execution_policy" { - name = "lambda_execution_policy" - path = "/" + name = "lambda_execution_policy" + path = "/" description = "IAM policy for Lambda execution" policy = jsonencode({ Version = "2012-10-17" Statement = [ - { + { Effect = "Allow" Action = [ "lambda:InvokeFunction", "lambda:GetFunction" ] Resource = "*" - } - ] + } + ] } ) } @@ -87,7 +85,7 @@ data "aws_iam_policy_document" "cw_document" { actions = ["logs:CreateLogGroup"] resources = [ "arn:aws:logs:${data.aws_region.current.name}:${data.aws_caller_identity.current.account_id}:*" - ] + ] } statement { @@ -95,15 +93,15 @@ data "aws_iam_policy_document" "cw_document" { "logs:CreateLogStream", "logs:CreateLogGroup", "logs:PutLogEvents" - ] - resources = [ - "arn:aws:logs:${data.aws_region.current.name}:${data.aws_caller_identity.current.account_id}:log-group:/aws/lambda/*" - ] + ] + resources = [ + "arn:aws:logs:${data.aws_region.current.name}:${data.aws_caller_identity.current.account_id}:log-group:/aws/lambda/*" + ] } } resource "aws_iam_policy" "cw_policy" { - name = "cw_policy" + name = "cw_policy" policy = data.aws_iam_policy_document.cw_document.json } @@ -128,20 +126,48 @@ resource "aws_iam_policy" "s3_write_policy" { # } resource "aws_iam_role_policy_attachment" "s3_attachment" { - role = aws_iam_role.multi_service_role.name + role = aws_iam_role.multi_service_role.name policy_arn = aws_iam_policy.s3_write_policy.arn } resource "aws_iam_role_policy_attachment" "lambda_attachment" { - role = aws_iam_role.multi_service_role.name + role = aws_iam_role.multi_service_role.name policy_arn = aws_iam_policy.lambda_execution_policy.arn } resource "aws_iam_role_policy_attachment" "cw_attachment" { - role = aws_iam_role.multi_service_role.name + role = aws_iam_role.multi_service_role.name policy_arn = aws_iam_policy.cw_policy.arn } ################ # RDS POLICIES # ################ + +################### +# EVENTS POLICIES # +################### + +data "aws_iam_policy_document" "cloudwatch_events_policy" { + statement { + actions = [ + "events:PutRule", + "events:PutTargets", + "events:RemoveTargets", + "events:DeleteRule", + "events:PutEvents" + ] + resources = ["*"] + effect = "Allow" + } +} + +resource "aws_iam_policy" "cloudwatch_events_policy" { + name = "cloudwatch_events_policy" + policy = data.aws_iam_policy_document.cloudwatch_events_policy.json +} + +resource "aws_iam_role_policy_attachment" "cloudwatch_events_attachment" { + role = aws_iam_role.multi_service_role.name + policy_arn = aws_iam_policy.cloudwatch_events_policy.arn +} diff --git a/terraform/lambda.tf b/terraform/lambda.tf index a5da972..fb0a666 100644 --- a/terraform/lambda.tf +++ b/terraform/lambda.tf @@ -1,73 +1,68 @@ -### EXTRACT LAMBDA SET UP -data "archive_file" "extract_lambda_zip" { - type = "zip" - source_file = "${path.module}/../src/extract_lambda.py" - output_path = "${path.module}/../extract_function.zip" +# Extract Lambda Function +resource "aws_s3_object" "extract_lambda_code" { + bucket = aws_s3_bucket.lambda_code_bucket.bucket + key = "${var.extract_lambda_name}/extract_function.zip" + source = "${path.module}/../extract_function.zip" + etag = filemd5("${path.module}/../extract_function.zip") } resource "aws_lambda_function" "extract_lambda" { - function_name = "${var.extract_lambda_name}" - s3_bucket = aws_s3_bucket.lambda_code_bucket.bucket - s3_key = "extract-lambda/extract_function.zip" - role = aws_iam_role.multi_service_role.arn #<< lambda role placehodler - handler = "extract_lambda.lambda_handler" # << check that the function is called lambda handler - runtime = "python3.11" - environment { - variables = { - output = aws_s3_bucket.extract_bucket.bucket - } - } -} + function_name = var.extract_lambda_name + s3_bucket = aws_s3_bucket.lambda_code_bucket.bucket + s3_key = aws_s3_object.extract_lambda_code.key + role = aws_iam_role.multi_service_role.arn + handler = "extract_lambda.extract" + runtime = "python3.11" -resource "aws_lambda_permission" "allow_to_write_to_s3_extract_bucket" { - action = "lambda:InvokeFunction" - function_name = aws_lambda_function.extract_lambda.function_name - principal = "s3.amazonaws.com" - source_arn = aws_s3_bucket.extract_bucket.arn -} + lifecycle { + create_before_destroy = true + } + depends_on = [aws_s3_object.extract_lambda_code] +} -### TRANSFORM LAMBDA SET UP -data "archive_file" "transform_lambda_zip" { - type = "zip" - source_file = "${path.module}/../src/transform_lambda.py" - output_path = "${path.module}/../transform_function.zip" +# Transform Lambda Function +resource "aws_s3_object" "transform_lambda_code" { + bucket = aws_s3_bucket.lambda_code_bucket.bucket + key = "${var.transform_lambda_name}/transform_function.zip" + source = "${path.module}/../transform_function.zip" + etag = filemd5("${path.module}/../transform_function.zip") } resource "aws_lambda_function" "transform_lambda" { - function_name = "${var.transform_lambda_name}" - s3_bucket = aws_s3_bucket.lambda_code_bucket.bucket - s3_key = "transform-lambda/transform_function.zip" - role = aws_iam_role.multi_service_role.arn # << lambda role placehodler - handler = "transform_lambda.lambda_handler" # << check that the function is called lambda handler - runtime = "python3.11" - environment { - variables = { - output = aws_s3_bucket.transform_bucket.bucket - } - } -} + function_name = var.transform_lambda_name + s3_bucket = aws_s3_bucket.lambda_code_bucket.bucket + s3_key = aws_s3_object.transform_lambda_code.key + role = aws_iam_role.multi_service_role.arn + handler = "transform_lambda.transform" + runtime = "python3.11" -resource "aws_lambda_permission" "allow_to_write_to_s3_transform_bucket" { - action = "lambda:InvokeFunction" - function_name = aws_lambda_function.transform_lambda.function_name - principal = "s3.amazonaws.com" - source_arn = aws_s3_bucket.transform_bucket.arn + lifecycle { + create_before_destroy = true + } + + depends_on = [aws_s3_object.transform_lambda_code] } -### LOAD LAMBDA SET UP -data "archive_file" "load_lambda_zip" { - type = "zip" - source_file = "${path.module}/../src/load_lambda.py" - output_path = "${path.module}/../load_function.zip" +# Load Lambda Function +resource "aws_s3_object" "load_lambda_code" { + bucket = aws_s3_bucket.lambda_code_bucket.bucket + key = "${var.load_lambda_name}/load_function.zip" + source = "${path.module}/../load_function.zip" + etag = filemd5("${path.module}/../load_function.zip") } resource "aws_lambda_function" "load_lambda" { - function_name = "${var.load_lambda_name}" - s3_bucket = aws_s3_bucket.lambda_code_bucket.bucket - s3_key = "load-lambda/load_function.zip" - role = aws_iam_role.multi_service_role.arn # << lambda role placehodler - handler = "load_lambda.lambda_handler" # << check that the function is called lambda handler - runtime = "python3.11" -} + function_name = var.load_lambda_name + s3_bucket = aws_s3_bucket.lambda_code_bucket.bucket + s3_key = aws_s3_object.load_lambda_code.key + role = aws_iam_role.multi_service_role.arn + handler = "load_lambda.load" + runtime = "python3.11" + lifecycle { + create_before_destroy = true + } + + depends_on = [aws_s3_object.load_lambda_code] +} diff --git a/terraform/main.tf b/terraform/main.tf index 2a048b9..3b06701 100644 --- a/terraform/main.tf +++ b/terraform/main.tf @@ -1,26 +1,26 @@ terraform { required_providers { aws = { - source = "hashicorp/aws" - version = "~>5.0" + source = "hashicorp/aws" + version = "~>5.0" } } backend "s3" { bucket = "bentley-project-secrets" - key = "bentley-project/terraform.tfstate" + key = "bentley-project/terraform.tfstate" region = "eu-west-2" } } provider "aws" { - region = "eu-west-2" - default_tags { - tags = { - ProjectName = "Terrific-Totes" - Team = "Team-Bentley" - Environment = "Dev" - GitHubRepo = "de-project-bentley" - ManagedBy = "Terraform" - } + region = "eu-west-2" + default_tags { + tags = { + ProjectName = "Terrific-Totes" + Team = "Team-Bentley" + Environment = "Dev" + GitHubRepo = "de-project-bentley" + ManagedBy = "Terraform" } -} \ No newline at end of file + } +} diff --git a/terraform/s3.tf b/terraform/s3.tf index 4c06b8e..d5cdee3 100644 --- a/terraform/s3.tf +++ b/terraform/s3.tf @@ -1,51 +1,14 @@ ### EXTRACT BUCKET SET-UP resource "aws_s3_bucket" "extract_bucket" { - bucket_prefix = "${var.s3_extract_bucket_name}-" + bucket_prefix = "${var.s3_extract_bucket_name}-" } -# resource "aws_s3_bucket_notification" "extract_bucket_notification" { -# bucket = aws_s3_bucket.extract_bucket.id -# lambda_function { -# lambda_function_arn = aws_lambda_function.extract_lambda.arn -# events = ["s3:ObjectCreated:*"] -# } -# depends_on = [aws_lambda_permission.allow_to_write_to_s3_extract_bucket] -# } # << is this the correct permission dependency? - ### TRANSFORM BUCKET SET-UP resource "aws_s3_bucket" "transform_bucket" { - bucket_prefix = "${var.s3_transform_bucket_name}-" + bucket_prefix = "${var.s3_transform_bucket_name}-" } -# resource "aws_s3_bucket_notification" "transform_bucket_notification" { -# bucket = aws_s3_bucket.transform_bucket.id -# lambda_function { -# lambda_function_arn = aws_lambda_function.transform_lambda.arn -# events = ["s3:ObjectCreated:*"] -# } -# depends_on = [aws_lambda_permission.allow_to_write_to_s3_transform_bucket] -# } # << is this the correct permission dependency? - - ### LAMBDA BUCKET resource "aws_s3_bucket" "lambda_code_bucket" { - bucket_prefix = "${var.s3_code_bucket_name}-" + bucket_prefix = "${var.s3_code_bucket_name}-" } - -# resource "aws_s3_object" "extract_lambda_code" { -# bucket = aws_s3_bucket.lambda_code_bucket.bucket -# key = "${var.extract_lambda_name}/extract_function.zip" -# source = "${path.module}/../extract_function.zip" -# } # << can't figure out how this is being used but we seem to need it - -# resource "aws_s3_object" "transform_lambda_code" { -# bucket = aws_s3_bucket.lambda_code_bucket.bucket -# key = "${var.transform_lambda_name}/transform_function.zip" -# source = "${path.module}/../transform_function.zip" -# } # << can't figure out how this is being used but we seem to need it - -# resource "aws_s3_object" "load_lambda_code" { -# bucket = aws_s3_bucket.lambda_code_bucket.bucket -# key = "${var.load_lambda_name}/load_function.zip" -# source = "${path.module}/../load_function.zip" -# } \ No newline at end of file diff --git a/terraform/vars.tf b/terraform/vars.tf index cc9348a..84824ce 100644 --- a/terraform/vars.tf +++ b/terraform/vars.tf @@ -1,33 +1,33 @@ variable "s3_extract_bucket_name" { - type = string - default = "extract-bucket" + type = string + default = "extract-bucket" } variable "s3_transform_bucket_name" { - type = string - default = "transform-bucket" + type = string + default = "transform-bucket" } variable "s3_code_bucket_name" { - type = string - default = "lambda-bucket" + type = string + default = "lambda-bucket" } variable "extract_lambda_name" { - type = string - default = "extract-lambda" + type = string + default = "extract-lambda" } variable "transform_lambda_name" { - type = string - default = "transform-lambda" + type = string + default = "transform-lambda" } variable "load_lambda_name" { - type = string - default = "load-lambda" + type = string + default = "load-lambda" } data "aws_caller_identity" "current" {} -data "aws_region" "current" {} \ No newline at end of file +data "aws_region" "current" {} -- cgit v1.2.3 From 43b7bca661a4c42acbfd6c27ff26fee9c25e911e Mon Sep 17 00:00:00 2001 From: Ellie Date: Wed, 14 Aug 2024 16:39:12 +0100 Subject: amend s3 handler to extract --- terraform/lambda.tf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/terraform/lambda.tf b/terraform/lambda.tf index a5da972..c190ea8 100644 --- a/terraform/lambda.tf +++ b/terraform/lambda.tf @@ -10,7 +10,7 @@ resource "aws_lambda_function" "extract_lambda" { s3_bucket = aws_s3_bucket.lambda_code_bucket.bucket s3_key = "extract-lambda/extract_function.zip" role = aws_iam_role.multi_service_role.arn #<< lambda role placehodler - handler = "extract_lambda.lambda_handler" # << check that the function is called lambda handler + handler = "extract_lambda.extract" # << check that the function is called lambda handler runtime = "python3.11" environment { variables = { -- cgit v1.2.3 From 51eb46bbeb8fe5cb7b8750c37c776e8c9b4ae7f6 Mon Sep 17 00:00:00 2001 From: Ellie Date: Wed, 14 Aug 2024 16:41:02 +0100 Subject: uncomment s3 objects --- terraform/s3.tf | 34 +++++++++++++++++----------------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/terraform/s3.tf b/terraform/s3.tf index 4c06b8e..8ab5622 100644 --- a/terraform/s3.tf +++ b/terraform/s3.tf @@ -32,20 +32,20 @@ resource "aws_s3_bucket" "lambda_code_bucket" { bucket_prefix = "${var.s3_code_bucket_name}-" } -# resource "aws_s3_object" "extract_lambda_code" { -# bucket = aws_s3_bucket.lambda_code_bucket.bucket -# key = "${var.extract_lambda_name}/extract_function.zip" -# source = "${path.module}/../extract_function.zip" -# } # << can't figure out how this is being used but we seem to need it - -# resource "aws_s3_object" "transform_lambda_code" { -# bucket = aws_s3_bucket.lambda_code_bucket.bucket -# key = "${var.transform_lambda_name}/transform_function.zip" -# source = "${path.module}/../transform_function.zip" -# } # << can't figure out how this is being used but we seem to need it - -# resource "aws_s3_object" "load_lambda_code" { -# bucket = aws_s3_bucket.lambda_code_bucket.bucket -# key = "${var.load_lambda_name}/load_function.zip" -# source = "${path.module}/../load_function.zip" -# } \ No newline at end of file +resource "aws_s3_object" "extract_lambda_code" { + bucket = aws_s3_bucket.lambda_code_bucket.bucket + key = "${var.extract_lambda_name}/extract_function.zip" + source = "${path.module}/../extract_function.zip" +} # << can't figure out how this is being used but we seem to need it + +resource "aws_s3_object" "transform_lambda_code" { + bucket = aws_s3_bucket.lambda_code_bucket.bucket + key = "${var.transform_lambda_name}/transform_function.zip" + source = "${path.module}/../transform_function.zip" +} # << can't figure out how this is being used but we seem to need it + +resource "aws_s3_object" "load_lambda_code" { + bucket = aws_s3_bucket.lambda_code_bucket.bucket + key = "${var.load_lambda_name}/load_function.zip" + source = "${path.module}/../load_function.zip" +} \ No newline at end of file -- cgit v1.2.3 From 848a86b7f3b9c5ce16cd774d19e3fa62ca8ffc68 Mon Sep 17 00:00:00 2001 From: T-Aji Date: Wed, 14 Aug 2024 18:14:01 +0100 Subject: test: mid-through test for process_and_upload_tables --- src/extract_lambda.py | 16 +++++++--------- tests/test_extract_lambda.py | 35 ++++++++++++++++++++++++++++++++--- 2 files changed, 39 insertions(+), 12 deletions(-) diff --git a/src/extract_lambda.py b/src/extract_lambda.py index 6e94bba..a70ecdd 100644 --- a/src/extract_lambda.py +++ b/src/extract_lambda.py @@ -5,6 +5,7 @@ import csv from botocore.exceptions import ClientError import logging import json +from datetime import datetime logger = logging.getLogger() logger.setLevel(logging.INFO) @@ -16,7 +17,7 @@ class DBConnectionException(Exception): """Initialise with provided error message.""" self.message = str(e) super().__init__(self.message) - + def lambda_handler(event, context): """This lambda function connects to the Totesys database, lists the contents of the ingestion bucket, and converts all tables to CSV and if any of those tables do not exist in, or are different to the ones in s3, it uploads them @@ -71,9 +72,6 @@ def connect_to_database() -> Connection: host=host, port=port ) - # except DatabaseError as e: - # logger.error(f'Database error: {e}') - # raise except InterfaceError as i: logger.error(f'Interface error: {i}') raise DBConnectionException("Failed to connect to database") @@ -110,14 +108,14 @@ def list_existing_s3_files(bucket_name='extract_bucket', client=boto3.client('s3 -def process_and_upload_tables(db, existing_files): +def process_and_upload_tables(db, existing_files, client=boto3.client('s3')): """Creates a list of the tables from a database query and then selects everything from each table in individual queries it then writes each table to CSV files and compares with the item - in the existing_files dictionary with the same name. If it finds sny changes + in the existing_files dictionary with the same name. If it finds any changes to files, or new tables/files it uploads them to the s3 bucket """ - client = boto3.client('s3') + tables = db.run("SELECT table_name FROM information_schema.tables WHERE table_schema='public' AND table_type='BASE TABLE';") for table in tables: @@ -132,13 +130,13 @@ def process_and_upload_tables(db, existing_files): writer.writerow(column_names) writer.writerows(rows) - s3_key = f"{table_name}/latest.csv" + s3_key = f"{table_name}/{datetime.today().year}/{datetime.today().month}/{datetime.today().day}/{table_name}_{datetime.now().strftime('%H:%M:%S')}.csv" new_csv_content = open(csv_file_path, "r").read() if s3_key not in existing_files or existing_files[s3_key] != new_csv_content: try: - client.upload_file(csv_file_path, ingestion_bucket, s3_key) + client.upload_file(csv_file_path, 'extract_bucket', s3_key) logger.info(f"Uploaded {s3_key} to S3.") except ClientError as e: logger.error(f'Error uploading to S3: {e}') \ No newline at end of file diff --git a/tests/test_extract_lambda.py b/tests/test_extract_lambda.py index 18c49fc..74d7e2c 100644 --- a/tests/test_extract_lambda.py +++ b/tests/test_extract_lambda.py @@ -3,7 +3,7 @@ import boto3 from moto import mock_aws from unittest.mock import patch from unittest import TestCase -from src.extract_lambda import list_existing_s3_files, connect_to_database, DBConnectionException #process_and_upload_tables +from src.extract_lambda import list_existing_s3_files, connect_to_database, DBConnectionException, process_and_upload_tables import os import logging @@ -33,7 +33,7 @@ def s3_client(aws_credentials): with mock_aws(): yield boto3.client('s3') -class TestListExistings3Files: +class TestListExistingS3Files: def test_error_if_no_bucket(self, s3_client, caplog): logger = logging.getLogger() @@ -80,4 +80,33 @@ class TestConnectToDatabase: caplog.set_level(logging.ERROR) with pytest.raises(DBConnectionException): connect_to_database() - assert 'Interface error' in caplog.text \ No newline at end of file + assert 'Interface error' in caplog.text + +class TestProcessAndUploadTables: + def test_error_process_and_upload_tables(mock_conn, mock_config, s3_client, caplog, mocker): + logger = logging.getLogger() + logger.info('Testing now.') + caplog.set_level(logging.ERROR) + + with patch("src.extract_lambda.Connection", autospec=True) as mock_conn: + mock_db = connect_to_database() + # need to add a table + s3_key = 'dummy/2024/8/14/dummy_16:46:30.txt' + mock_existing_files = mocker.Mock(return_value={s3_key: 'This is a test file.' }) + s3_client.create_bucket(Bucket='extract_bucket', + CreateBucketConfiguration={ + 'LocationConstraint': 'eu-west-2' + }) + s3_client.upload_file('tests/dummy.txt', 'extract_bucket', s3_key) + process_and_upload_tables(mock_db, mock_existing_files, client=s3_client) + + assert 'Error uploading to S3' in caplog.text + +#@pytest.mark.describe("Helpers") +# @pytest.mark.it("Query processor returns correctly formatted dict") +# def test_process_query(): +# with patch("src.api.helpers.get_db_connection") as mock_conn: +# mock_conn().run.side_effect = db_data +# mock_conn().columns = sample_headers +# result = process_query("test query") +# assert result == sample_result \ No newline at end of file -- cgit v1.2.3 From 5cdcbd64e9f4dba5f3ed8e8eb9f6e91e1adde0ba Mon Sep 17 00:00:00 2001 From: Alex Schofield Date: Wed, 14 Aug 2024 22:51:32 +0100 Subject: chore(gitignore): ignore .DS_Store --- .gitignore | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index d759665..d1df545 100644 --- a/.gitignore +++ b/.gitignore @@ -6,4 +6,5 @@ *.zip .terraform/ .terraform* -log* \ No newline at end of file +log* +.DS_Store -- cgit v1.2.3 From 9ff947c167932bb9ff93f05c8adf2ffcd98b91cc Mon Sep 17 00:00:00 2001 From: Alex Schofield Date: Wed, 14 Aug 2024 22:52:33 +0100 Subject: infra(tf): simplify multi_service_role --- terraform/iam.tf | 32 +++++++++++++++----------------- 1 file changed, 15 insertions(+), 17 deletions(-) diff --git a/terraform/iam.tf b/terraform/iam.tf index cf4902a..20aeab3 100644 --- a/terraform/iam.tf +++ b/terraform/iam.tf @@ -16,9 +16,7 @@ resource "aws_iam_role" "multi_service_role" { Principal = { Service = [ "lambda.amazonaws.com", - "cloudwatch.amazonaws.com", - "events.amazonaws.com", - "s3.amazonaws.com" + "scheduler.amazonaws.com" ] } } @@ -57,22 +55,22 @@ data "aws_iam_policy_document" "s3_data_policy_doc" { ######################################################################## resource "aws_iam_policy" "lambda_execution_policy" { - name = "lambda_execution_policy" - path = "/" + name = "lambda_execution_policy" + path = "/" description = "IAM policy for Lambda execution" policy = jsonencode({ Version = "2012-10-17" Statement = [ - { + { Effect = "Allow" Action = [ "lambda:InvokeFunction", "lambda:GetFunction" ] Resource = "*" - } - ] + } + ] } ) } @@ -87,7 +85,7 @@ data "aws_iam_policy_document" "cw_document" { actions = ["logs:CreateLogGroup"] resources = [ "arn:aws:logs:${data.aws_region.current.name}:${data.aws_caller_identity.current.account_id}:*" - ] + ] } statement { @@ -95,15 +93,15 @@ data "aws_iam_policy_document" "cw_document" { "logs:CreateLogStream", "logs:CreateLogGroup", "logs:PutLogEvents" - ] - resources = [ - "arn:aws:logs:${data.aws_region.current.name}:${data.aws_caller_identity.current.account_id}:log-group:/aws/lambda/*" - ] + ] + resources = [ + "arn:aws:logs:${data.aws_region.current.name}:${data.aws_caller_identity.current.account_id}:log-group:/aws/lambda/*" + ] } } resource "aws_iam_policy" "cw_policy" { - name = "cw_policy" + name = "cw_policy" policy = data.aws_iam_policy_document.cw_document.json } @@ -128,17 +126,17 @@ resource "aws_iam_policy" "s3_write_policy" { # } resource "aws_iam_role_policy_attachment" "s3_attachment" { - role = aws_iam_role.multi_service_role.name + role = aws_iam_role.multi_service_role.name policy_arn = aws_iam_policy.s3_write_policy.arn } resource "aws_iam_role_policy_attachment" "lambda_attachment" { - role = aws_iam_role.multi_service_role.name + role = aws_iam_role.multi_service_role.name policy_arn = aws_iam_policy.lambda_execution_policy.arn } resource "aws_iam_role_policy_attachment" "cw_attachment" { - role = aws_iam_role.multi_service_role.name + role = aws_iam_role.multi_service_role.name policy_arn = aws_iam_policy.cw_policy.arn } -- cgit v1.2.3 From 3f89444cb09f0372b6a7621913944e372acd826c Mon Sep 17 00:00:00 2001 From: Alex Schofield Date: Wed, 14 Aug 2024 22:58:50 +0100 Subject: infra(tf): temporarily remove events policies --- terraform/iam.tf | 28 ---------------------------- 1 file changed, 28 deletions(-) diff --git a/terraform/iam.tf b/terraform/iam.tf index 6c6b4fc..20aeab3 100644 --- a/terraform/iam.tf +++ b/terraform/iam.tf @@ -143,31 +143,3 @@ resource "aws_iam_role_policy_attachment" "cw_attachment" { ################ # RDS POLICIES # ################ - -################### -# EVENTS POLICIES # -################### - -data "aws_iam_policy_document" "cloudwatch_events_policy" { - statement { - actions = [ - "events:PutRule", - "events:PutTargets", - "events:RemoveTargets", - "events:DeleteRule", - "events:PutEvents" - ] - resources = ["*"] - effect = "Allow" - } -} - -resource "aws_iam_policy" "cloudwatch_events_policy" { - name = "cloudwatch_events_policy" - policy = data.aws_iam_policy_document.cloudwatch_events_policy.json -} - -resource "aws_iam_role_policy_attachment" "cloudwatch_events_attachment" { - role = aws_iam_role.multi_service_role.name - policy_arn = aws_iam_policy.cloudwatch_events_policy.arn -} -- cgit v1.2.3 From 8b975ba92d10c8034bdb35fee19c42b6187ea24f Mon Sep 17 00:00:00 2001 From: Alex Schofield Date: Wed, 14 Aug 2024 23:03:14 +0100 Subject: chore(cleanup): remove commented out code --- terraform/iam.tf | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/terraform/iam.tf b/terraform/iam.tf index 20aeab3..195b07c 100644 --- a/terraform/iam.tf +++ b/terraform/iam.tf @@ -114,17 +114,6 @@ resource "aws_iam_policy" "s3_write_policy" { policy = data.aws_iam_policy_document.s3_data_policy_doc.json } -# S3 ATTACH POLICY -# resource "aws_iam_role_policy_attachment" "lambda_s3_policy_attachment" { -# for_each = toset([ -# aws_iam_policy.s3_write_policy.arn, -# aws_iam_policy.lambda_execution_policy.arn, -# aws_iam_policy.cw_policy.arn -# ]) -# role = aws_iam_role.multi_service_role.name -# policy_arn = each.value -# } - resource "aws_iam_role_policy_attachment" "s3_attachment" { role = aws_iam_role.multi_service_role.name policy_arn = aws_iam_policy.s3_write_policy.arn -- cgit v1.2.3 From 8ce75bcc1a89000fc0a9fea9b78ad31cd316ca06 Mon Sep 17 00:00:00 2001 From: Alex Schofield Date: Wed, 14 Aug 2024 23:04:32 +0100 Subject: chore(cleanup): remove unused rds section --- terraform/iam.tf | 3 --- 1 file changed, 3 deletions(-) diff --git a/terraform/iam.tf b/terraform/iam.tf index 195b07c..acb98f4 100644 --- a/terraform/iam.tf +++ b/terraform/iam.tf @@ -129,6 +129,3 @@ resource "aws_iam_role_policy_attachment" "cw_attachment" { policy_arn = aws_iam_policy.cw_policy.arn } -################ -# RDS POLICIES # -################ -- cgit v1.2.3 From c5f840e96b8c1696a6ed506e9260c4f1c26db10d Mon Sep 17 00:00:00 2001 From: Alex Schofield Date: Wed, 14 Aug 2024 23:05:26 +0100 Subject: feat(tf): add cloudwatch events iam policies --- terraform/iam.tf | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/terraform/iam.tf b/terraform/iam.tf index acb98f4..0e5fa6d 100644 --- a/terraform/iam.tf +++ b/terraform/iam.tf @@ -129,3 +129,30 @@ resource "aws_iam_role_policy_attachment" "cw_attachment" { policy_arn = aws_iam_policy.cw_policy.arn } +################### +# EVENTS POLICIES # +################### + +data "aws_iam_policy_document" "cloudwatch_events_policy" { + statement { + actions = [ + "events:PutRule", + "events:PutTargets", + "events:RemoveTargets", + "events:DeleteRule", + "events:PutEvents" + ] + resources = ["*"] + effect = "Allow" + } +} + +resource "aws_iam_policy" "cloudwatch_events_policy" { + name = "cloudwatch_events_policy" + policy = data.aws_iam_policy_document.cloudwatch_events_policy.json +} + +resource "aws_iam_role_policy_attachment" "cloudwatch_events_attachment" { + role = aws_iam_role.multi_service_role.name + policy_arn = aws_iam_policy.cloudwatch_events_policy.arn +} -- cgit v1.2.3 From d78db8a0f99c778b1e4b401c7cb4a094a5f2b103 Mon Sep 17 00:00:00 2001 From: Alex Schofield Date: Thu, 15 Aug 2024 00:24:41 +0100 Subject: fix(infra/tf): add unique id to permissions This was done to resolve issues with `ResourceConflictException` - add random_string resources for every lambda permission to add as suffixes - also add lifecycle options to force recreation whenever the random strings change --- terraform/events.tf | 45 ++++++++++++++++++++++++++++++++++----------- 1 file changed, 34 insertions(+), 11 deletions(-) diff --git a/terraform/events.tf b/terraform/events.tf index d2e2eb5..263141f 100644 --- a/terraform/events.tf +++ b/terraform/events.tf @@ -1,11 +1,25 @@ +resource "random_string" "eventbridge_suffix" { + length = 8 + special = false + upper = false +} + +resource "random_string" "s3_ingestion_suffix" { + length = 8 + special = false + upper = false +} + +resource "random_string" "s3_transform_suffix" { + length = 8 + special = false + upper = false +} + resource "aws_cloudwatch_event_rule" "lambda_trigger" { name = "lambda-scheduled-trigger" description = "Schedule to trigger the Lambda function" schedule_expression = "rate(30 minutes)" - - lifecycle { - create_before_destroy = true - } } resource "aws_cloudwatch_event_target" "extract_lambda_cw_event" { @@ -16,21 +30,28 @@ resource "aws_cloudwatch_event_target" "extract_lambda_cw_event" { } resource "aws_lambda_permission" "allow_eventbridge" { - statement_id = "AllowExecutionFromEventBridge" + statement_id = "AllowExecutionFromEventBridge${random_string.eventbridge_suffix.result}" action = "lambda:InvokeFunction" - function_name = aws_lambda_function.extract_lambda.function_name #replaced lambda name placeholder + function_name = aws_lambda_function.extract_lambda.function_name principal = "events.amazonaws.com" source_arn = aws_cloudwatch_event_rule.lambda_trigger.arn -} + lifecycle { + replace_triggered_by = [random_string.eventbridge_suffix] + } +} # below is step function 1 resource "aws_lambda_permission" "allow_s3_ingestion" { - statement_id = "AllowS3InvokeLambdaTransform" + statement_id = "AllowS3InvokeLambdaTransform${random_string.s3_ingestion_suffix.result}" action = "lambda:InvokeFunction" function_name = aws_lambda_function.transform_lambda.function_name #replaced lambda name placeholder principal = "s3.amazonaws.com" source_arn = aws_s3_bucket.extract_bucket.arn #replaced bucket name placeholder + + lifecycle { + replace_triggered_by = [random_string.s3_ingestion_suffix] + } } @@ -45,14 +66,16 @@ resource "aws_s3_bucket_notification" "extract_bucket_notification" { depends_on = [aws_lambda_permission.allow_s3_ingestion] } -###### - resource "aws_lambda_permission" "allow_s3_transform_bucket" { - statement_id = "AllowS3InvokeLambdaTransform" + statement_id = "AllowS3InvokeLambdaTransform${random_string.s3_transform_suffix.result}" action = "lambda:InvokeFunction" function_name = aws_lambda_function.transform_lambda.function_name #replaced lambda name placeholder principal = "s3.amazonaws.com" source_arn = aws_s3_bucket.transform_bucket.arn #replaced bucket name placeholder + + lifecycle { + replace_triggered_by = [random_string.s3_transform_suffix] + } } -- cgit v1.2.3 From 79d230f3c76609d32a3d8c553d64f37ce9fe6d09 Mon Sep 17 00:00:00 2001 From: Alex Schofield Date: Thu, 15 Aug 2024 10:15:31 +0100 Subject: fix(tf): create lambda zips before referencing --- terraform/lambda.tf | 27 +++++++++++++++++++++------ 1 file changed, 21 insertions(+), 6 deletions(-) diff --git a/terraform/lambda.tf b/terraform/lambda.tf index fb0a666..72d1306 100644 --- a/terraform/lambda.tf +++ b/terraform/lambda.tf @@ -1,9 +1,14 @@ # Extract Lambda Function +data "archive_file" "extract_lambda_zip" { + type = "zip" + source_file = "${path.module}/../src/extract_lambda.py" + output_path = "${path.module}/../extract_function.zip" +} resource "aws_s3_object" "extract_lambda_code" { bucket = aws_s3_bucket.lambda_code_bucket.bucket key = "${var.extract_lambda_name}/extract_function.zip" - source = "${path.module}/../extract_function.zip" - etag = filemd5("${path.module}/../extract_function.zip") + source = data.archive_file.extract_lambda_zip.output_path + etag = filemd5(data.archive_file.extract_lambda_zip.output_path) } resource "aws_lambda_function" "extract_lambda" { @@ -22,11 +27,16 @@ resource "aws_lambda_function" "extract_lambda" { } # Transform Lambda Function +data "archive_file" "transform_lambda_zip" { + type = "zip" + source_file = "${path.module}/../src/transform_lambda.py" + output_path = "${path.module}/../transform_function.zip" +} resource "aws_s3_object" "transform_lambda_code" { bucket = aws_s3_bucket.lambda_code_bucket.bucket key = "${var.transform_lambda_name}/transform_function.zip" - source = "${path.module}/../transform_function.zip" - etag = filemd5("${path.module}/../transform_function.zip") + source = data.archive_file.transform_lambda_zip.output_path + etag = filemd5(data.archive_file.transform_lambda_zip.output_path) } resource "aws_lambda_function" "transform_lambda" { @@ -45,11 +55,16 @@ resource "aws_lambda_function" "transform_lambda" { } # Load Lambda Function +data "archive_file" "load_lambda_zip" { + type = "zip" + source_file = "${path.module}/../src/load_lambda.py" + output_path = "${path.module}/../load_function.zip" +} resource "aws_s3_object" "load_lambda_code" { bucket = aws_s3_bucket.lambda_code_bucket.bucket key = "${var.load_lambda_name}/load_function.zip" - source = "${path.module}/../load_function.zip" - etag = filemd5("${path.module}/../load_function.zip") + source = data.archive_file.load_lambda_zip.output_path + etag = filemd5(data.archive_file.load_lambda_zip.output_path) } resource "aws_lambda_function" "load_lambda" { -- cgit v1.2.3 From 733cb2eae5ab77c71c24747b5652ce135bb4efe4 Mon Sep 17 00:00:00 2001 From: Alex Schofield Date: Thu, 15 Aug 2024 11:03:38 +0100 Subject: chore(gitignore): clean-up --- .gitignore | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index d1df545..cd44594 100644 --- a/.gitignore +++ b/.gitignore @@ -1,10 +1,15 @@ +# Terraform *.tfstate *.tfstate.* *.tfvars *.tfvars.json .terraform.tfstate.lock.info -*.zip .terraform/ .terraform* + +# Output Files +*.zip log* + +# OS-Related Files .DS_Store -- cgit v1.2.3 From fe548561acc5e133e3bee4026aab85db2e511bcd Mon Sep 17 00:00:00 2001 From: lian-manonog Date: Thu, 15 Aug 2024 13:51:53 +0100 Subject: wip: secrets manager pushing to merge with extract_lambda --- .gitignore | 1 + src/extract_lambda.py | 1 + src/secrets_manager.py | 48 ++++++++++++++++++++++++++++++++++++++++++++ test/test_secrets_manager.py | 34 +++++++++++++++++++++++++++++++ 4 files changed, 84 insertions(+) create mode 100644 src/secrets_manager.py create mode 100644 test/test_secrets_manager.py diff --git a/.gitignore b/.gitignore index d1df545..d164c3f 100644 --- a/.gitignore +++ b/.gitignore @@ -8,3 +8,4 @@ .terraform* log* .DS_Store +venv \ No newline at end of file diff --git a/src/extract_lambda.py b/src/extract_lambda.py index 7d56c66..faa1d30 100644 --- a/src/extract_lambda.py +++ b/src/extract_lambda.py @@ -2,6 +2,7 @@ from pg8000.native import Connection, Error, DatabaseError, InterfaceError from dotenv import load_dotenv import os + load_dotenv() def extract(): diff --git a/src/secrets_manager.py b/src/secrets_manager.py new file mode 100644 index 0000000..c0fb61e --- /dev/null +++ b/src/secrets_manager.py @@ -0,0 +1,48 @@ +import boto3 +from botocore.exceptions import ClientError +import json + + +def sm_client(): + sm_client = boto3.client('secretsmanager') + yield sm_client + +def create_secret(sm_client, secret_name, cohort_id, user, password, host, database, port): + secret = { + "cohort_id": cohort_id, + "user": user, + "password": password, + "host": host, + "database": database, + "port": port + } + + response = sm_client.create_secret( + Name = secret_name, + SecretString = json.dumps(secret) + ) + + print(response) + return response + +def list_secret(sm_client): + response = sm_client.list_secrets() + secret_dict = response['SecretList'] + secret_names = [] + for items in secret_dict: + secret_names.append(items['Name']) + print(f'{len(secret_names)} secret(s) available') + for name in secret_names: + print(name) + return secret_names + +def retrieve_secrets(sm_client): + response = sm_client.get_secrets( + + ) + + + +#retrieve secret +#so lambda can access totesy db +#so lambda connect to the db and then retrieve the data \ No newline at end of file diff --git a/test/test_secrets_manager.py b/test/test_secrets_manager.py new file mode 100644 index 0000000..86533bc --- /dev/null +++ b/test/test_secrets_manager.py @@ -0,0 +1,34 @@ +from src.secrets_manager import sm_client, create_secret, list_secret +import boto3 +from moto import mock_aws +import json +import pytest +import os + +pytest.fixture(scope='class') +def mock_aws_credentials(): + """Mocked AWS Credentials for moto.""" + os.environ["AWS_ACCESS_KEY_ID"] = "testing" + os.environ["AWS_SECRET_ACCESS_KEY"] = "testing" + os.environ["AWS_SECURITY_TOKEN"] = "testing" + os.environ["AWS_SESSION_TOKEN"] = "testing" + os.environ["AWS_DEFAULT_REGION"] = "eu-west-2" + +@pytest.fixture(scope='class') +def mock_sm_client(mock_aws_credentials): + with mock_aws(): + yield boto3.client('secretsmanager') + + +def test_create_secret_stores_secrets(mock_sm_client): + cohort_id = "test_cohort_id" + user = "test_user_id" + password = "test_password" + host = "test_host" + database = "test_database" + port = "test_port" + + secret_name = "test_secret" + response = create_secret(mock_sm_client, secret_name, cohort_id, user, password, host, database, port) + + assert response['Name'] == secret_name \ No newline at end of file -- cgit v1.2.3 From 3ee88c87fbae7e9968c4fc7f9aae5e7f28581aad Mon Sep 17 00:00:00 2001 From: Alex Schofield Date: Thu, 15 Aug 2024 15:32:53 +0100 Subject: ci: separate python & terraform jobs --- .github/workflows/on-commit.yml | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/.github/workflows/on-commit.yml b/.github/workflows/on-commit.yml index 937aeaa..e4eb4f8 100644 --- a/.github/workflows/on-commit.yml +++ b/.github/workflows/on-commit.yml @@ -6,7 +6,7 @@ on: - 'main' jobs: - quality-checks: + python-quality-checks: runs-on: ubuntu-latest steps: - uses : actions/checkout@v4 @@ -25,6 +25,9 @@ jobs: - name : 'Python: Formatting' run: | black --check . + terraform-quality-checks: + runs-on: ubuntu-latest + steps: - name: 'Terraform: Setup' uses: hashicorp/setup-terraform@v3 with: @@ -37,4 +40,4 @@ jobs: run: terraform init -backend=false - name: 'Terraform: Validate' working-directory: ./terraform - run: terraform validate + run: terraform validate \ No newline at end of file -- cgit v1.2.3 From 6964625c65ae1552b8182891cf47997e480cce5e Mon Sep 17 00:00:00 2001 From: Alex Schofield Date: Thu, 15 Aug 2024 15:34:03 +0100 Subject: fix(ci): correct terraform folder path --- .github/workflows/on-commit.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/on-commit.yml b/.github/workflows/on-commit.yml index e4eb4f8..355729e 100644 --- a/.github/workflows/on-commit.yml +++ b/.github/workflows/on-commit.yml @@ -33,11 +33,11 @@ jobs: with: terraform_version: latest - name: 'Terraform: Formatting' - working-directory: ./terraform + working-directory: terraform run: terraform fmt -check -recursive - name: 'Terraform: Initialise' - working-directory: ./terraform + working-directory: terraform run: terraform init -backend=false - name: 'Terraform: Validate' - working-directory: ./terraform + working-directory: terraform run: terraform validate \ No newline at end of file -- cgit v1.2.3 From e5f2d8c98dd029bfb9926c35002abcf998510cf7 Mon Sep 17 00:00:00 2001 From: Alex Schofield Date: Thu, 15 Aug 2024 15:37:26 +0100 Subject: fix(ci): add missing checkout action --- .github/workflows/on-commit.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/on-commit.yml b/.github/workflows/on-commit.yml index 355729e..a4e66bb 100644 --- a/.github/workflows/on-commit.yml +++ b/.github/workflows/on-commit.yml @@ -28,6 +28,7 @@ jobs: terraform-quality-checks: runs-on: ubuntu-latest steps: + - uses : actions/checkout@v4 - name: 'Terraform: Setup' uses: hashicorp/setup-terraform@v3 with: -- cgit v1.2.3 From 47f5abae5b1b033a805b08c2a00d7df0bb0dcd97 Mon Sep 17 00:00:00 2001 From: Alex Schofield Date: Thu, 15 Aug 2024 15:41:35 +0100 Subject: test(ci): add continue-on-error for debugging --- .github/workflows/on-commit.yml | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/.github/workflows/on-commit.yml b/.github/workflows/on-commit.yml index a4e66bb..fd9ffb8 100644 --- a/.github/workflows/on-commit.yml +++ b/.github/workflows/on-commit.yml @@ -18,13 +18,16 @@ jobs: run: | python -m pip install --upgrade pip pip install flake8 pylint black bandit safety + continue-on-error: true - name : 'Python: Linting' run: | flake8 . find . -name "*.py" | xargs pylint + continue-on-error: true - name : 'Python: Formatting' run: | black --check . + continue-on-error: true terraform-quality-checks: runs-on: ubuntu-latest steps: @@ -36,9 +39,12 @@ jobs: - name: 'Terraform: Formatting' working-directory: terraform run: terraform fmt -check -recursive + continue-on-error: true - name: 'Terraform: Initialise' working-directory: terraform run: terraform init -backend=false + continue-on-error: true - name: 'Terraform: Validate' working-directory: terraform - run: terraform validate \ No newline at end of file + run: terraform validate + continue-on-error: true \ No newline at end of file -- cgit v1.2.3 From cc13dc8d170d8c60dbb92e4e802a854bbdf81d5b Mon Sep 17 00:00:00 2001 From: Alex Schofield Date: Thu, 15 Aug 2024 16:03:00 +0100 Subject: test(ci): add terraform deploy job --- .github/workflows/deploy.yml | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) create mode 100644 .github/workflows/deploy.yml diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml new file mode 100644 index 0000000..6674373 --- /dev/null +++ b/.github/workflows/deploy.yml @@ -0,0 +1,37 @@ +name: deploy-terraform + +on: + push: + branches: + - main # Adjust the branch based on our deployment strategy + +jobs: + deploy-terraform: + name: Deploy Terraform + runs-on: ubuntu-latest + environment: test-env + steps: + - name: Checkout Repo + uses: actions/checkout@v4 + + - name: Install Terraform + uses: hashicorp/setup-terraform@v3 + + - name: Configure AWS Credentials + uses: aws-actions/configure-aws-credentials@v4 + with: + aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} + aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} + aws-region: ${{ secrets.AWS_REGION }} + + - name: Terraform Init + working-directory: terraform + run: terraform init + + - name: Terraform Plan + working-directory: terraform + run: terraform plan + + - name: Terraform Apply + working-directory: terraform + run: terraform apply --auto-approve \ No newline at end of file -- cgit v1.2.3 From fc8e61c0e58df57195c6a33852a0a17ba34322c6 Mon Sep 17 00:00:00 2001 From: Alex Schofield Date: Thu, 15 Aug 2024 16:05:03 +0100 Subject: fix(ci): amend to trigger on commit to test-ci/... --- .github/workflows/deploy.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml index 6674373..372d0b3 100644 --- a/.github/workflows/deploy.yml +++ b/.github/workflows/deploy.yml @@ -3,7 +3,7 @@ name: deploy-terraform on: push: branches: - - main # Adjust the branch based on our deployment strategy + - test-ci/** # Adjust the branch based on our deployment strategy jobs: deploy-terraform: -- cgit v1.2.3 From c9bf342c8f6038a3f5397bfc8c53d251f27e7eec Mon Sep 17 00:00:00 2001 From: Ang Bel Date: Thu, 15 Aug 2024 16:45:47 +0100 Subject: procefss_and_upload_tables test in progress --- requirements.txt | 30 ++++++++++++++++++++++++++++ src/extract_lambda.py | 30 +++++++++++++++++++--------- tests/dummy_identical.csv | 4 ++++ tests/test_extract_lambda.py | 47 +++++++++++++++++++++----------------------- 4 files changed, 77 insertions(+), 34 deletions(-) create mode 100644 requirements.txt create mode 100644 tests/dummy_identical.csv diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..6f383f9 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,30 @@ +asn1crypto==1.5.1 +boto3==1.34.159 +botocore==1.34.159 +certifi==2024.7.4 +cffi==1.17.0 +charset-normalizer==3.3.2 +cryptography==43.0.0 +idna==3.7 +iniconfig==2.0.0 +Jinja2==3.1.4 +jmespath==1.0.1 +MarkupSafe==2.1.5 +moto==5.0.12 +packaging==24.1 +pg8000==1.31.2 +pluggy==1.5.0 +pycparser==2.22 +pytest==8.3.2 +pytest-mock==3.14.0 +python-dateutil==2.9.0.post0 +python-dotenv==1.0.1 +PyYAML==6.0.2 +requests==2.32.3 +responses==0.25.3 +s3transfer==0.10.2 +scramp==1.4.5 +six==1.16.0 +urllib3==2.2.2 +Werkzeug==3.0.3 +xmltodict==0.13.0 \ No newline at end of file diff --git a/src/extract_lambda.py b/src/extract_lambda.py index 56b47a6..fb2d7e8 100644 --- a/src/extract_lambda.py +++ b/src/extract_lambda.py @@ -6,6 +6,7 @@ from botocore.exceptions import ClientError import logging import json from datetime import datetime +import re logger = logging.getLogger() @@ -117,9 +118,16 @@ def process_and_upload_tables(db, existing_files, client=boto3.client('s3')): in the existing_files dictionary with the same name. If it finds any changes to files, or new tables/files it uploads them to the s3 bucket """ - + ## NEW CODE + all_datetimes = [] + for file_names in existing_files.keys(): + datetime_str_on_s3 = ''.join(re.search(r'\/(.+/).+_(.+)\.csv',file_names).group(1,2)) + all_datetimes.append(datetime.strptime(datetime_str_on_s3, '%Y/%m/%d/%H:%M:%S')) + latest_timestamp = max(all_datetimes) + ## END OF NEW CODE + tables = db.run("SELECT table_name FROM information_schema.tables WHERE table_schema='public' AND table_type='BASE TABLE';") - + print(tables) for table in tables: table_name = table[0] rows = db.run(f"SELECT * FROM {table_name};") @@ -128,17 +136,21 @@ def process_and_upload_tables(db, existing_files, client=boto3.client('s3')): csv_file_path = f"/tmp/{table_name}.csv" with open(csv_file_path, "w", newline='') as file: writer = csv.writer(file) - column_names = [desc["name"] for desc in db.columns(f"SELECT * FROM {table_name};")] + #column_names = [desc["name"] for desc in db.columns(f"SELECT * FROM {table_name};")] + column_names = [col_name[0] for col_name in db.run(f"SELECT column_name FROM INFORMATION_SCHEMA.COLUMNS where table_name = '{table_name}';")] writer.writerow(column_names) writer.writerows(rows) - - s3_key = f"{table_name}/{datetime.today().year}/{datetime.today().month}/{datetime.today().day}/{table_name}_{datetime.now().strftime('%H:%M:%S')}.csv" + s3_key = datetime.strftime(datetime.today(),f'{table_name}/%Y/%m/%d/{table_name}_%H:%M:%S.csv') new_csv_content = open(csv_file_path, "r").read() - - - if s3_key not in existing_files or existing_files[s3_key] != new_csv_content: + ## NEW CODE + latest_s3_object_key = datetime.strftime(latest_timestamp,f'{table_name}/%Y/%m/%d/{table_name}_%H:%M:%S.csv') + ## END OF NEW CODE + if existing_files[latest_s3_object_key] != new_csv_content: try: client.upload_file(csv_file_path, 'extract_bucket', s3_key) logger.info(f"Uploaded {s3_key} to S3.") except ClientError as e: - logger.error(f'Error uploading to S3: {e}') \ No newline at end of file + logger.error(f'Error uploading to S3: {e}') + else: + logger.info(f"No new data.") + \ No newline at end of file diff --git a/tests/dummy_identical.csv b/tests/dummy_identical.csv new file mode 100644 index 0000000..fdd8993 --- /dev/null +++ b/tests/dummy_identical.csv @@ -0,0 +1,4 @@ +Food_type,Flavour,Colour +Vegetable,Sour,Green +Berry,Sweet,Red + diff --git a/tests/test_extract_lambda.py b/tests/test_extract_lambda.py index 74d7e2c..e94a8a4 100644 --- a/tests/test_extract_lambda.py +++ b/tests/test_extract_lambda.py @@ -1,7 +1,7 @@ import pytest import boto3 from moto import mock_aws -from unittest.mock import patch +from unittest.mock import patch, MagicMock from unittest import TestCase from src.extract_lambda import list_existing_s3_files, connect_to_database, DBConnectionException, process_and_upload_tables import os @@ -81,32 +81,29 @@ class TestConnectToDatabase: with pytest.raises(DBConnectionException): connect_to_database() assert 'Interface error' in caplog.text - +''' class TestProcessAndUploadTables: - def test_error_process_and_upload_tables(mock_conn, mock_config, s3_client, caplog, mocker): + def test_error_process_and_upload_tables(mock_conn, mock_config, s3_client, caplog): logger = logging.getLogger() logger.info('Testing now.') caplog.set_level(logging.ERROR) - - with patch("src.extract_lambda.Connection", autospec=True) as mock_conn: - mock_db = connect_to_database() - # need to add a table - s3_key = 'dummy/2024/8/14/dummy_16:46:30.txt' - mock_existing_files = mocker.Mock(return_value={s3_key: 'This is a test file.' }) + #### + queries = ["SELECT table_name FROM information_schema.tables WHERE table_schema='public' AND table_type='BASE TABLE';", + "SELECT * FROM Fruits;", + "SELECT column_name FROM INFORMATION_SCHEMA.COLUMNS where table_name = 'Fruits'"] + return_values = [[['Fruits']], + [['Vegetable','Sour','Green'],['Berry','Sweet','Red']], + [['Food_type'],['Flavour'],['Colour']]] + vals = dict(zip(queries,return_values)) + + #### + with patch('src.extract_lambda.connect_to_database') as mock_db: + mock_db().run.side_effects = return_values + s3_key = 'Fruits/2024/08/15/Fruits_16:46:30.csv' + existing_files = {s3_key: 'Food_type,Flavour,Colour\nFruit,Sour,Green\nBerry,Sweet,Red'} s3_client.create_bucket(Bucket='extract_bucket', - CreateBucketConfiguration={ - 'LocationConstraint': 'eu-west-2' - }) - s3_client.upload_file('tests/dummy.txt', 'extract_bucket', s3_key) - process_and_upload_tables(mock_db, mock_existing_files, client=s3_client) - - assert 'Error uploading to S3' in caplog.text - -#@pytest.mark.describe("Helpers") -# @pytest.mark.it("Query processor returns correctly formatted dict") -# def test_process_query(): -# with patch("src.api.helpers.get_db_connection") as mock_conn: -# mock_conn().run.side_effect = db_data -# mock_conn().columns = sample_headers -# result = process_query("test query") -# assert result == sample_result \ No newline at end of file + CreateBucketConfiguration={'LocationConstraint': 'eu-west-2'}) + s3_client.upload_file('tests/dummy_identical.csv', 'extract_bucket', s3_key) + process_and_upload_tables(mock_db(), existing_files, client=s3_client) + assert 'No new data.' in caplog.text +''' \ No newline at end of file -- cgit v1.2.3 From 486fb62af5568a70e22ded622072883758e9ffdf Mon Sep 17 00:00:00 2001 From: Alex Schofield Date: Thu, 15 Aug 2024 17:25:03 +0100 Subject: fix(tf): resolve naming issues with resources --- terraform/rds.tf | 42 ++++++++++++++++++++++-------------------- terraform/vars.tf | 4 ++-- 2 files changed, 24 insertions(+), 22 deletions(-) diff --git a/terraform/rds.tf b/terraform/rds.tf index 4b25c5f..88783b7 100644 --- a/terraform/rds.tf +++ b/terraform/rds.tf @@ -2,9 +2,9 @@ data "aws_availability_zones" "available" {} module "vpc" { source = "terraform-aws-modules/vpc/aws" - version = "2.77.0" + version = "5.12.1" - name = "${var.project_name}" + name = var.project_name cidr = "10.0.0.0/16" azs = data.aws_availability_zones.available.names public_subnets = ["10.0.4.0/24", "10.0.5.0/24", "10.0.6.0/24"] @@ -13,7 +13,7 @@ module "vpc" { } resource "aws_db_subnet_group" "Terrific-Totes-sub-gr" { - name = "TT-db-subnet" + name = "tt-db-subnet" subnet_ids = module.vpc.public_subnets tags = { @@ -45,7 +45,7 @@ resource "aws_security_group" "rds" { } resource "aws_db_parameter_group" "Terrific-Totes-param-gr" { - name = "TT-db-param" + name = "tt-db-param" family = "postgres14" parameter { @@ -54,25 +54,27 @@ resource "aws_db_parameter_group" "Terrific-Totes-param-gr" { } } -resource "aws_db_instance" "Terrific-Totes-rds" { - db_name = "${var.project_name}" - instance_class = "db.t3.micro" - allocated_storage = 5 - engine = "postgres" - engine_version = "14.1" - username = "user credentials for the root user" # we could use .env here - password = "user password for the root user" # we could use .env here +resource "aws_db_instance" "terrific-totes-rds" { + db_name = var.project_name + instance_class = "db.t3.micro" + allocated_storage = 5 + engine = "postgres" + engine_version = "14.10" + username = "totes" + password = "totes123" + # username = "user credentials for the root user" # we could use .env here + # password = "user password for the root user" # we could use .env here ### alternatively to providing username nad password we can specify: -# resource "aws_kms_key" "example_key" { -# description = "Example KMS Key" -# } -# within the resource: -# manage_master_user_password = true -# master_user_secret_kms_key_id = aws_kms_key.example.key_id -# } + # resource "aws_kms_key" "example_key" { + # description = "Example KMS Key" + # } + # within the resource: + # manage_master_user_password = true + # master_user_secret_kms_key_id = aws_kms_key.example.key_id + # } db_subnet_group_name = aws_db_subnet_group.Terrific-Totes-sub-gr.name vpc_security_group_ids = [aws_security_group.rds.id] parameter_group_name = aws_db_parameter_group.Terrific-Totes-param-gr.name publicly_accessible = false skip_final_snapshot = true -} \ No newline at end of file +} diff --git a/terraform/vars.tf b/terraform/vars.tf index d5cdafb..3c88731 100644 --- a/terraform/vars.tf +++ b/terraform/vars.tf @@ -29,8 +29,8 @@ variable "load_lambda_name" { } variable "project_name" { - type = string - default = "Terrific-Totes" + type = string + default = "tt" } data "aws_caller_identity" "current" {} -- cgit v1.2.3 From 610261fec06ab3b6106465960d6935dd9df85df0 Mon Sep 17 00:00:00 2001 From: Ang Bel Date: Fri, 16 Aug 2024 09:46:53 +0100 Subject: Secrets manager integration into the extract lambda reviewed. --- src/extract_lambda.py | 29 +++++++++-------- tests/test_secrets_manager.py | 73 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 89 insertions(+), 13 deletions(-) create mode 100644 tests/test_secrets_manager.py diff --git a/src/extract_lambda.py b/src/extract_lambda.py index fb2d7e8..3055f63 100644 --- a/src/extract_lambda.py +++ b/src/extract_lambda.py @@ -1,5 +1,4 @@ -from pg8000.native import Connection, DatabaseError, InterfaceError -from dotenv import dotenv_values +from pg8000.native import Connection, InterfaceError import boto3 import csv from botocore.exceptions import ClientError @@ -42,31 +41,35 @@ def lambda_handler(event, context): 'statusCode': 200, 'body': json.dumps('CSV files processed and uploaded successfully.') } - except Exception as e: logger.error(f'Error: {e}') return { 'statusCode': 500, 'body': json.dumps('Internal server error.') } - finally: - if db: db.close() -def get_config(path: str = ".env") -> dict: - return dotenv_values(path) +def retrieve_secrets(sm_client=boto3.client('secretsmanager'), secret_name='bentley-secrets'): + try: + response = sm_client.get_secret_value(SecretId=secret_name) + if 'SecretString' in response: + secret = json.loads(response['SecretString']) + return secret + except ClientError as e: + logger.error(f'Could not retrieve secrets: {e}') + raise e def connect_to_database() -> Connection: try: - config = get_config() - host = config["host"] - port = config["port"] - user = config["user"] - password = config["password"] - database = config["database"] + secrets = retrieve_secrets() + host = secrets["host"] + port = secrets["port"] + user = secrets["user"] + password = secrets["password"] + database = secrets["database"] return Connection( database=database, diff --git a/tests/test_secrets_manager.py b/tests/test_secrets_manager.py new file mode 100644 index 0000000..a30be86 --- /dev/null +++ b/tests/test_secrets_manager.py @@ -0,0 +1,73 @@ +from src.secrets_manager import sm_client, retrieve_secrets +import boto3 +import botocore.exceptions +from moto import mock_aws +import json +import pytest +import os + +@pytest.fixture(scope='function') +def aws_credentials(): + """Mocked AWS Credentials for moto.""" + os.environ["AWS_ACCESS_KEY_ID"] = "testing" + os.environ["AWS_SECRET_ACCESS_KEY"] = "testing" + os.environ["AWS_SECURITY_TOKEN"] = "testing" + os.environ["AWS_SESSION_TOKEN"] = "testing" + os.environ["AWS_DEFAULT_REGION"] = "eu-west-2" + +@pytest.fixture(scope='function') +def mock_sm_client(aws_credentials): + with mock_aws(): + yield boto3.client("secretsmanager") + +@pytest.fixture(scope='function') +def mock_store_secret(mock_sm_client): + secret = { + "cohort_id": "test_cohort_id", + "user": "test_user_id", + "password": "test_password", + "host": "test_host", + "database": "test_database", + "port": "test_port" + } + + secret_name = "test_secret" + + response = mock_sm_client.create_secret(Name=secret_name, SecretString=json.dumps(secret)) + + return response + +def test_retrieves_secrets_returns_dictionary(mock_sm_client, mock_store_secret): + secret_name = "test_secret" + + result = retrieve_secrets(mock_sm_client, secret_name) + + assert isinstance(result, dict) + +def test_retrieves_secrets_returns_correct_keys_and_values(mock_sm_client, mock_store_secret): + + secret_name = "test_secret" + + result = retrieve_secrets(mock_sm_client, secret_name) + + assert result["cohort_id"] == "test_cohort_id" + assert result["user"] == "test_user_id" + assert result["password"] == "test_password" + assert result["host"] == "test_host" + assert result["database"] == "test_database" + assert result["port"] == "test_port" + +def test_retrieves_secrets_raises_error_if_secret_name_incorrect_data_type(mock_sm_client): + secret_name = [1, 2, 3] + + + with pytest.raises(botocore.exceptions.ParamValidationError) as error: + retrieve_secrets(mock_sm_client, secret_name) + + +def test_retrieves_secrets_raises_error_if_secret_name_does_not_exist(mock_sm_client, mock_store_secret): + secret_name = 'test_secret_2' + + + with pytest.raises(botocore.exceptions.ClientError) as error: + retrieve_secrets(mock_sm_client, secret_name) \ No newline at end of file -- cgit v1.2.3 From 938ddda10ff2f7d5360ca0a939fa2f16d6beb09d Mon Sep 17 00:00:00 2001 From: Ang Bel Date: Fri, 16 Aug 2024 10:01:06 +0100 Subject: extract bucket name retrieval helper function and replace the bucket name placeholders --- src/extract_lambda.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/src/extract_lambda.py b/src/extract_lambda.py index 3055f63..f4c0c1d 100644 --- a/src/extract_lambda.py +++ b/src/extract_lambda.py @@ -82,9 +82,12 @@ def connect_to_database() -> Connection: logger.error(f'Interface error: {i}') raise DBConnectionException("Failed to connect to database") +def extract_bucket(client=boto3.client('s3')): + response = client.list_buckets() + extract_bucket_filter = [bucket['Name'] for bucket in response['Buckets'] if 'extract' in bucket['Name']] + return extract_bucket_filter[0] - -def list_existing_s3_files(bucket_name='extract_bucket', client=boto3.client('s3')): +def list_existing_s3_files(bucket_name=extract_bucket(), client=boto3.client('s3')): """Creates a dictionary and populates it with the results of listing the contents of the s3 bucket, then returns the populated dictionary @@ -93,7 +96,7 @@ def list_existing_s3_files(bucket_name='extract_bucket', client=boto3.client('s3 existing_files = {} try: - response = client.list_objects_v2(Bucket='extract_bucket') + response = client.list_objects_v2(Bucket=bucket_name) if 'Contents' in response: for obj in response['Contents']: @@ -150,7 +153,7 @@ def process_and_upload_tables(db, existing_files, client=boto3.client('s3')): ## END OF NEW CODE if existing_files[latest_s3_object_key] != new_csv_content: try: - client.upload_file(csv_file_path, 'extract_bucket', s3_key) + client.upload_file(csv_file_path, extract_bucket(), s3_key) logger.info(f"Uploaded {s3_key} to S3.") except ClientError as e: logger.error(f'Error uploading to S3: {e}') -- cgit v1.2.3 From c937a7e098d818dadbc769b3c9eb9fd93cc05af2 Mon Sep 17 00:00:00 2001 From: Alex Schofield Date: Fri, 16 Aug 2024 10:01:28 +0100 Subject: docs: rm DEVNOTES.md basically redundant now --- DEVNOTES.md | 100 ------------------------------------------------------------ 1 file changed, 100 deletions(-) delete mode 100644 DEVNOTES.md diff --git a/DEVNOTES.md b/DEVNOTES.md deleted file mode 100644 index 00b4ddd..0000000 --- a/DEVNOTES.md +++ /dev/null @@ -1,100 +0,0 @@ -# Workflow - -## References - -https://nvie.com/posts/a-successful-git-branching-model/ \ -https://learn.microsoft.com/en-us/azure/devops/repos/git/merging-with-squash?view=azure-devops - - -## Branching - -*Based off GitFlow but slightly modified* - -- There are two main branches - - `main` - production-ready code - - `development` - integration branch for features - - `staging` - represents the current staging state -- In addition, there are additional branches - - Feature branches - for new features and non-urgent bugfixes - - Hotfix branches - probably won't be used but for critical bugs in production (this is what testing should prevent) - - Release branches - for preparation of production releases - -- Feature branches - e.g. `feature/short-description` -- Bugfix branches - e.g. `bugfix/short-description` -- Hotfix branches - e.g. `hotfix/short-description` -- Release branches - e.g. `release/vX.Y.Z` - -### Examples -``` -feature/add-data-extractor -bugfix/fix-s3-upload-error -hotfix/security-patch -release/v1.0.0 -``` - -## Environments - -1. Development - where active development and initial testing occur -2. Staging - for integration testing and final checks before production -3. Production - live and stable environment - -## Deployment - -1. `main` - represents the current production state -2. `develop` - represents the integration branch for features and non-urgent fixes -3. `staging` - represents the current staging state - -## Staging Flow - -1. Create feature branches from `develop` & merge completed features back into `develop` -2. When the `develop` branch is ready for testing, create a `staging` branch from `develop` -3. Deploy the `staging` branch to the staging environment and perform our unit-tests -4. If staging tests pass, create a `release/vX.Y.Z` branch from `staging` -5. Make any final adjustments in the `release/vX.Y.Z` branch -6. Once we have approved the changes in the `release/vX.Y.Z` branch, merge into `main` -7. Tag the release in `main` - -### Notes - -- No new features should be included in the release branches and any new features should be merged into `develop` for the next release cycle - -## Commit Messages - -Please follow the conventional commits specification: - -``` -[optional scope]: - - - -[optional footer(s)] -``` - -### Types -- feat: new features -- fix: bugfixes -- docs: documentation-only changes -- style: changes that do not affect the meaning of the code -- refactor: code changes that neither fix bugs nor adds features -- perf: code changes that improve performance -- test: adding tests or correcting existing tests -- chore: changes to build process or tools/libraries (probably not needed) -- infra: changes to infrastructure configuration (e.g. Terraform) - -### Examples -``` -feat(extract): add automatic scheduling for data ingestion -docs: update README with project setup instructions -``` - -Configuration files for things such as Terraform isn't native to Conventional Commits, but we can add our own: - -``` -infra(tf): update S3 bucket policy -``` - -If the Terraform change involves a fix, you may combine `fix` and `infra`: - -``` -fix(infra): ... -``` -- cgit v1.2.3 From 861fd5fe8303c6558c7763477c89dc98fff23c57 Mon Sep 17 00:00:00 2001 From: lian-manonog Date: Fri, 16 Aug 2024 10:20:14 +0100 Subject: wip: pushing the ci-cd-branch to test terraform infra --- .github/workflows/deploy.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml index 372d0b3..922daee 100644 --- a/.github/workflows/deploy.yml +++ b/.github/workflows/deploy.yml @@ -3,7 +3,7 @@ name: deploy-terraform on: push: branches: - - test-ci/** # Adjust the branch based on our deployment strategy + - ci-cd-branch # Adjust the branch based on our deployment strategy jobs: deploy-terraform: -- cgit v1.2.3 From d25f05ba140cb85847ca604bef0e68b76a17ba62 Mon Sep 17 00:00:00 2001 From: Alex Schofield Date: Fri, 16 Aug 2024 10:34:50 +0100 Subject: docs: add draft summary section --- README.md | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 8ae0cb3..203482e 100644 --- a/README.md +++ b/README.md @@ -1 +1,14 @@ -# de-project-bentley \ No newline at end of file +# ToteSys - Data Engineering Project + +# Summary +The project aims to implement a data platform that can extract data from an +operational database, archive it in a data lake, and make it easily accessible +within a remodelled OLAP data warehouse. + +The solution showcases our skills in: + +- Python +- PostgreSQL +- Database modelling +- Amazon Web Services (AWS) +- Agile methodologies \ No newline at end of file -- cgit v1.2.3 From 9809e7ca1351d7b27f62b3c7c74db7124cab5dc9 Mon Sep 17 00:00:00 2001 From: Alex Schofield Date: Fri, 16 Aug 2024 10:40:00 +0100 Subject: docs: add draft main objective section --- README.md | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 203482e..e55cb16 100644 --- a/README.md +++ b/README.md @@ -11,4 +11,14 @@ The solution showcases our skills in: - PostgreSQL - Database modelling - Amazon Web Services (AWS) -- Agile methodologies \ No newline at end of file +- Agile methodologies + +# Main Objective + +Our goal is to create a reliable ETL (Extract, Transform, Load) pipeline that +can: + +1. Extract the data from the `totesys` operational database +2. Store the data in AWS S3 buckets, that will form our data lake +3. Transform the data into a suitable schema for the data warehouse +4. Load the data into the data warehouse hosted on AWS \ No newline at end of file -- cgit v1.2.3 From 37eb3bb7974904614867c7d0c2d4f6eccb39f22e Mon Sep 17 00:00:00 2001 From: Alex Schofield Date: Fri, 16 Aug 2024 10:41:01 +0100 Subject: docs(main_obj): clarify data being loaded into data warehouse --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index e55cb16..9c7baee 100644 --- a/README.md +++ b/README.md @@ -21,4 +21,4 @@ can: 1. Extract the data from the `totesys` operational database 2. Store the data in AWS S3 buckets, that will form our data lake 3. Transform the data into a suitable schema for the data warehouse -4. Load the data into the data warehouse hosted on AWS \ No newline at end of file +4. Load the transformed data into the data warehouse hosted on AWS \ No newline at end of file -- cgit v1.2.3 From 0c42e8f165e0f98a6c16252e841432922467ef94 Mon Sep 17 00:00:00 2001 From: Ellie Date: Fri, 16 Aug 2024 10:51:16 +0100 Subject: add lambda layer --- terraform/lambda.tf | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/terraform/lambda.tf b/terraform/lambda.tf index 72d1306..658b8c8 100644 --- a/terraform/lambda.tf +++ b/terraform/lambda.tf @@ -81,3 +81,38 @@ resource "aws_lambda_function" "load_lambda" { depends_on = [aws_s3_object.load_lambda_code] } + +locals { + layer_dir = "${path.module}/../python" + requirements = "${path.module}/../requirements.txt" + layer_zip = "${path.module}/../layer.zip" +} + +resource "null_resource" "prepare_layer" { + triggers = { + requirements_hash = filesha1(local.requirements) + } + provisioner "local-exec" { + command = < Date: Fri, 16 Aug 2024 11:09:59 +0100 Subject: docs: add draft key features section --- README.md | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 9c7baee..0bf6b9d 100644 --- a/README.md +++ b/README.md @@ -21,4 +21,17 @@ can: 1. Extract the data from the `totesys` operational database 2. Store the data in AWS S3 buckets, that will form our data lake 3. Transform the data into a suitable schema for the data warehouse -4. Load the transformed data into the data warehouse hosted on AWS \ No newline at end of file +4. Load the transformed data into the data warehouse hosted on AWS + +# Key Features + +We aim for the project to have certain features. Some are more prioritised than +others. + +- [ ] Automated data ingestion from `totesys` db +- [ ] Data storage for ingested and processed data in S3 buckets +- [ ] Data transformation for data warehouse schema +- [ ] Automated data loading into the data warehouse schema +- [ ] Logging and monitoring with CloudWatch +- [ ] Notifications for errors and successful runs (e.g. successful ingestion) +- [ ] Visualisation of warehouse data \ No newline at end of file -- cgit v1.2.3 From 24dd35f4bc6a0b8934f09b320f73bc88c6f68f1f Mon Sep 17 00:00:00 2001 From: Ellie Date: Fri, 16 Aug 2024 12:19:54 +0100 Subject: comment out rds.tf to increases tf speed --- terraform/rds.tf | 138 +++++++++++++++++++++++++++---------------------------- 1 file changed, 69 insertions(+), 69 deletions(-) diff --git a/terraform/rds.tf b/terraform/rds.tf index 88783b7..d1b4959 100644 --- a/terraform/rds.tf +++ b/terraform/rds.tf @@ -1,80 +1,80 @@ -data "aws_availability_zones" "available" {} +# data "aws_availability_zones" "available" {} -module "vpc" { - source = "terraform-aws-modules/vpc/aws" - version = "5.12.1" +# module "vpc" { +# source = "terraform-aws-modules/vpc/aws" +# version = "5.12.1" - name = var.project_name - cidr = "10.0.0.0/16" - azs = data.aws_availability_zones.available.names - public_subnets = ["10.0.4.0/24", "10.0.5.0/24", "10.0.6.0/24"] - enable_dns_hostnames = true - enable_dns_support = true -} +# name = var.project_name +# cidr = "10.0.0.0/16" +# azs = data.aws_availability_zones.available.names +# public_subnets = ["10.0.4.0/24", "10.0.5.0/24", "10.0.6.0/24"] +# enable_dns_hostnames = true +# enable_dns_support = true +# } -resource "aws_db_subnet_group" "Terrific-Totes-sub-gr" { - name = "tt-db-subnet" - subnet_ids = module.vpc.public_subnets +# resource "aws_db_subnet_group" "Terrific-Totes-sub-gr" { +# name = "tt-db-subnet" +# subnet_ids = module.vpc.public_subnets - tags = { - Name = "${var.project_name}" - } -} +# tags = { +# Name = "${var.project_name}" +# } +# } -resource "aws_security_group" "rds" { - name = "${var.project_name}-rds" - vpc_id = module.vpc.vpc_id +# resource "aws_security_group" "rds" { +# name = "${var.project_name}-rds" +# vpc_id = module.vpc.vpc_id - ingress { - from_port = 5432 - to_port = 5432 - protocol = "tcp" - cidr_blocks = ["0.0.0.0/0"] - } +# ingress { +# from_port = 5432 +# to_port = 5432 +# protocol = "tcp" +# cidr_blocks = ["0.0.0.0/0"] +# } - egress { - from_port = 5432 - to_port = 5432 - protocol = "tcp" - cidr_blocks = ["0.0.0.0/0"] - } +# egress { +# from_port = 5432 +# to_port = 5432 +# protocol = "tcp" +# cidr_blocks = ["0.0.0.0/0"] +# } - tags = { - Name = "${var.project_name}-rds" - } -} +# tags = { +# Name = "${var.project_name}-rds" +# } +# } -resource "aws_db_parameter_group" "Terrific-Totes-param-gr" { - name = "tt-db-param" - family = "postgres14" +# resource "aws_db_parameter_group" "Terrific-Totes-param-gr" { +# name = "tt-db-param" +# family = "postgres14" - parameter { - name = "log_connections" - value = "1" - } -} +# parameter { +# name = "log_connections" +# value = "1" +# } +# } -resource "aws_db_instance" "terrific-totes-rds" { - db_name = var.project_name - instance_class = "db.t3.micro" - allocated_storage = 5 - engine = "postgres" - engine_version = "14.10" - username = "totes" - password = "totes123" - # username = "user credentials for the root user" # we could use .env here - # password = "user password for the root user" # we could use .env here - ### alternatively to providing username nad password we can specify: - # resource "aws_kms_key" "example_key" { - # description = "Example KMS Key" - # } - # within the resource: - # manage_master_user_password = true - # master_user_secret_kms_key_id = aws_kms_key.example.key_id - # } - db_subnet_group_name = aws_db_subnet_group.Terrific-Totes-sub-gr.name - vpc_security_group_ids = [aws_security_group.rds.id] - parameter_group_name = aws_db_parameter_group.Terrific-Totes-param-gr.name - publicly_accessible = false - skip_final_snapshot = true -} +# resource "aws_db_instance" "terrific-totes-rds" { +# db_name = var.project_name +# instance_class = "db.t3.micro" +# allocated_storage = 5 +# engine = "postgres" +# engine_version = "14.10" +# username = "totes" +# password = "totes123" +# # username = "user credentials for the root user" # we could use .env here +# # password = "user password for the root user" # we could use .env here +# ### alternatively to providing username nad password we can specify: +# # resource "aws_kms_key" "example_key" { +# # description = "Example KMS Key" +# # } +# # within the resource: +# # manage_master_user_password = true +# # master_user_secret_kms_key_id = aws_kms_key.example.key_id +# # } +# db_subnet_group_name = aws_db_subnet_group.Terrific-Totes-sub-gr.name +# vpc_security_group_ids = [aws_security_group.rds.id] +# parameter_group_name = aws_db_parameter_group.Terrific-Totes-param-gr.name +# publicly_accessible = false +# skip_final_snapshot = true +# } -- cgit v1.2.3 From 3d56751d93eeb5ef6cef1f44dd54ee38fcd1fe3c Mon Sep 17 00:00:00 2001 From: lian-manonog Date: Fri, 16 Aug 2024 12:20:54 +0100 Subject: wip: change env line 14 to production --- .github/workflows/deploy.yml | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml index 922daee..bd9df57 100644 --- a/.github/workflows/deploy.yml +++ b/.github/workflows/deploy.yml @@ -5,15 +5,17 @@ on: branches: - ci-cd-branch # Adjust the branch based on our deployment strategy + jobs: deploy-terraform: name: Deploy Terraform runs-on: ubuntu-latest - environment: test-env + #needs: run-checks (must ref on-commit.yml file) + environment: production steps: - name: Checkout Repo uses: actions/checkout@v4 - + - name: Install Terraform uses: hashicorp/setup-terraform@v3 -- cgit v1.2.3 From a217da60ba75a226bf72a9fc680c4cbabe883aea Mon Sep 17 00:00:00 2001 From: Alex Schofield Date: Fri, 16 Aug 2024 12:53:22 +0100 Subject: docs: add empty sections --- README.md | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 0bf6b9d..6bc75dc 100644 --- a/README.md +++ b/README.md @@ -34,4 +34,10 @@ others. - [ ] Automated data loading into the data warehouse schema - [ ] Logging and monitoring with CloudWatch - [ ] Notifications for errors and successful runs (e.g. successful ingestion) -- [ ] Visualisation of warehouse data \ No newline at end of file +- [ ] Visualisation of warehouse data + +# Test Coverage +TBA + +# Contributors +TBA \ No newline at end of file -- cgit v1.2.3 From dd68d948dec97fedfcaa89806523975ad1224c71 Mon Sep 17 00:00:00 2001 From: Ang Bel Date: Fri, 16 Aug 2024 13:48:22 +0100 Subject: refactoring for extract lambda to filter by last updated and if not empty write it s3 --- .gitignore | 2 ++ src/extract_lambda.py | 26 +++++++++++--------------- 2 files changed, 13 insertions(+), 15 deletions(-) diff --git a/.gitignore b/.gitignore index ca15434..bceab93 100644 --- a/.gitignore +++ b/.gitignore @@ -14,3 +14,5 @@ __pycache__/ # OS-Related Files .DS_Store + +*venv* diff --git a/src/extract_lambda.py b/src/extract_lambda.py index f4c0c1d..e348bef 100644 --- a/src/extract_lambda.py +++ b/src/extract_lambda.py @@ -136,9 +136,9 @@ def process_and_upload_tables(db, existing_files, client=boto3.client('s3')): print(tables) for table in tables: table_name = table[0] - rows = db.run(f"SELECT * FROM {table_name};") - + rows = db.run(f"SELECT * FROM {table_name} WHERE last_updated >= {datetime.strftime(latest_timestamp,'%H-%m-%d %H:%M:%S')};") + if rows: csv_file_path = f"/tmp/{table_name}.csv" with open(csv_file_path, "w", newline='') as file: writer = csv.writer(file) @@ -147,16 +147,12 @@ def process_and_upload_tables(db, existing_files, client=boto3.client('s3')): writer.writerow(column_names) writer.writerows(rows) s3_key = datetime.strftime(datetime.today(),f'{table_name}/%Y/%m/%d/{table_name}_%H:%M:%S.csv') - new_csv_content = open(csv_file_path, "r").read() - ## NEW CODE - latest_s3_object_key = datetime.strftime(latest_timestamp,f'{table_name}/%Y/%m/%d/{table_name}_%H:%M:%S.csv') - ## END OF NEW CODE - if existing_files[latest_s3_object_key] != new_csv_content: - try: - client.upload_file(csv_file_path, extract_bucket(), s3_key) - logger.info(f"Uploaded {s3_key} to S3.") - except ClientError as e: - logger.error(f'Error uploading to S3: {e}') - else: - logger.info(f"No new data.") - \ No newline at end of file + + try: + client.upload_file(csv_file_path, extract_bucket(), s3_key) + logger.info(f"Uploaded {s3_key} to S3.") + except ClientError as e: + logger.error(f'Error uploading to S3: {e}') + else: + logger.info(f"No new data.") + \ No newline at end of file -- cgit v1.2.3 From c284df39ed7735d736f4fe0f2571ba846b8f6315 Mon Sep 17 00:00:00 2001 From: "deepsource-autofix[bot]" <62050782+deepsource-autofix[bot]@users.noreply.github.com> Date: Fri, 16 Aug 2024 12:51:02 +0000 Subject: style: format code with Autopep8, Black and Ruff Formatter This commit fixes the style issues introduced in dd68d94 according to the output from Autopep8, Black and Ruff Formatter. Details: https://github.com/ajschofield/de-project-bentley/pull/47 --- src/extract_lambda.py | 130 +++++++++++++++++++++++++++----------------------- 1 file changed, 71 insertions(+), 59 deletions(-) diff --git a/src/extract_lambda.py b/src/extract_lambda.py index e348bef..323d04a 100644 --- a/src/extract_lambda.py +++ b/src/extract_lambda.py @@ -20,48 +20,49 @@ class DBConnectionException(Exception): self.message = str(e) super().__init__(self.message) + def lambda_handler(event, context): """This lambda function connects to the Totesys database, lists the contents of the ingestion bucket, - and converts all tables to CSV and if any of those tables do not exist in, or are different to the ones in s3, it uploads them - it uses 3 helper functions to achieve these 3 functionalities + and converts all tables to CSV and if any of those tables do not exist in, or are different to the ones in s3, it uploads them + it uses 3 helper functions to achieve these 3 functionalities """ try: db = connect_to_database() existing_files = list_existing_s3_files() any_changes = process_and_upload_tables(db, existing_files) - + if not any_changes: logger.info("No changes detected in the database.") return { - 'statusCode': 200, - 'body': json.dumps('No changes detected, no CSV files were uploaded.') + "statusCode": 200, + "body": json.dumps("No changes detected, no CSV files were uploaded."), } else: return { - 'statusCode': 200, - 'body': json.dumps('CSV files processed and uploaded successfully.') + "statusCode": 200, + "body": json.dumps("CSV files processed and uploaded successfully."), } except Exception as e: - logger.error(f'Error: {e}') - return { - 'statusCode': 500, - 'body': json.dumps('Internal server error.') - } + logger.error(f"Error: {e}") + return {"statusCode": 500, "body": json.dumps("Internal server error.")} finally: if db: db.close() -def retrieve_secrets(sm_client=boto3.client('secretsmanager'), secret_name='bentley-secrets'): +def retrieve_secrets( + sm_client=boto3.client("secretsmanager"), secret_name="bentley-secrets" +): try: response = sm_client.get_secret_value(SecretId=secret_name) - if 'SecretString' in response: - secret = json.loads(response['SecretString']) + if "SecretString" in response: + secret = json.loads(response["SecretString"]) return secret except ClientError as e: - logger.error(f'Could not retrieve secrets: {e}') + logger.error(f"Could not retrieve secrets: {e}") raise e + def connect_to_database() -> Connection: try: secrets = retrieve_secrets() @@ -72,87 +73,98 @@ def connect_to_database() -> Connection: database = secrets["database"] return Connection( - database=database, - user=user, - password=password, - host=host, - port=port + database=database, user=user, password=password, host=host, port=port ) except InterfaceError as i: - logger.error(f'Interface error: {i}') + logger.error(f"Interface error: {i}") raise DBConnectionException("Failed to connect to database") -def extract_bucket(client=boto3.client('s3')): + +def extract_bucket(client=boto3.client("s3")): response = client.list_buckets() - extract_bucket_filter = [bucket['Name'] for bucket in response['Buckets'] if 'extract' in bucket['Name']] + extract_bucket_filter = [ + bucket["Name"] for bucket in response["Buckets"] if "extract" in bucket["Name"] + ] return extract_bucket_filter[0] -def list_existing_s3_files(bucket_name=extract_bucket(), client=boto3.client('s3')): - """Creates a dictionary and populates it with the - results of listing the contents of the s3 bucket, then - returns the populated dictionary + +def list_existing_s3_files(bucket_name=extract_bucket(), client=boto3.client("s3")): + """Creates a dictionary and populates it with the + results of listing the contents of the s3 bucket, then + returns the populated dictionary """ - + existing_files = {} - + try: response = client.list_objects_v2(Bucket=bucket_name) - - if 'Contents' in response: - for obj in response['Contents']: - s3_key = obj['Key'] + + if "Contents" in response: + for obj in response["Contents"]: + s3_key = obj["Key"] try: file_obj = client.get_object(Bucket=bucket_name, Key=s3_key) - file_content = file_obj['Body'].read().decode('utf-8') + file_content = file_obj["Body"].read().decode("utf-8") existing_files[s3_key] = file_content except ClientError as e: - logger.error(f'Error retrieving S3 object {s3_key}: {e}') + logger.error(f"Error retrieving S3 object {s3_key}: {e}") else: - logger.error('The bucket is empty') - + logger.error("The bucket is empty") + except ClientError as e: - logger.error(f'Error listing S3 objects: {e}') - - return existing_files + logger.error(f"Error listing S3 objects: {e}") + return existing_files -def process_and_upload_tables(db, existing_files, client=boto3.client('s3')): - """Creates a list of the tables from a database query and - then selects everything from each table in individual queries - it then writes each table to CSV files and compares with the item - in the existing_files dictionary with the same name. If it finds any changes - to files, or new tables/files it uploads them to the s3 bucket +def process_and_upload_tables(db, existing_files, client=boto3.client("s3")): + """Creates a list of the tables from a database query and + then selects everything from each table in individual queries + it then writes each table to CSV files and compares with the item + in the existing_files dictionary with the same name. If it finds any changes + to files, or new tables/files it uploads them to the s3 bucket """ - ## NEW CODE + # NEW CODE all_datetimes = [] for file_names in existing_files.keys(): - datetime_str_on_s3 = ''.join(re.search(r'\/(.+/).+_(.+)\.csv',file_names).group(1,2)) - all_datetimes.append(datetime.strptime(datetime_str_on_s3, '%Y/%m/%d/%H:%M:%S')) + datetime_str_on_s3 = "".join( + re.search(r"\/(.+/).+_(.+)\.csv", file_names).group(1, 2) + ) + all_datetimes.append(datetime.strptime(datetime_str_on_s3, "%Y/%m/%d/%H:%M:%S")) latest_timestamp = max(all_datetimes) - ## END OF NEW CODE + # END OF NEW CODE - tables = db.run("SELECT table_name FROM information_schema.tables WHERE table_schema='public' AND table_type='BASE TABLE';") + tables = db.run( + "SELECT table_name FROM information_schema.tables WHERE table_schema='public' AND table_type='BASE TABLE';" + ) print(tables) for table in tables: table_name = table[0] - rows = db.run(f"SELECT * FROM {table_name} WHERE last_updated >= {datetime.strftime(latest_timestamp,'%H-%m-%d %H:%M:%S')};") + rows = db.run( + f"SELECT * FROM {table_name} WHERE last_updated >= {datetime.strftime(latest_timestamp,'%H-%m-%d %H:%M:%S')};" + ) if rows: csv_file_path = f"/tmp/{table_name}.csv" - with open(csv_file_path, "w", newline='') as file: + with open(csv_file_path, "w", newline="") as file: writer = csv.writer(file) - #column_names = [desc["name"] for desc in db.columns(f"SELECT * FROM {table_name};")] - column_names = [col_name[0] for col_name in db.run(f"SELECT column_name FROM INFORMATION_SCHEMA.COLUMNS where table_name = '{table_name}';")] + # column_names = [desc["name"] for desc in db.columns(f"SELECT * FROM {table_name};")] + column_names = [ + col_name[0] + for col_name in db.run( + f"SELECT column_name FROM INFORMATION_SCHEMA.COLUMNS where table_name = '{table_name}';" + ) + ] writer.writerow(column_names) writer.writerows(rows) - s3_key = datetime.strftime(datetime.today(),f'{table_name}/%Y/%m/%d/{table_name}_%H:%M:%S.csv') + s3_key = datetime.strftime( + datetime.today(), f"{table_name}/%Y/%m/%d/{table_name}_%H:%M:%S.csv" + ) try: client.upload_file(csv_file_path, extract_bucket(), s3_key) logger.info(f"Uploaded {s3_key} to S3.") except ClientError as e: - logger.error(f'Error uploading to S3: {e}') + logger.error(f"Error uploading to S3: {e}") else: logger.info(f"No new data.") - \ No newline at end of file -- cgit v1.2.3 From 0727dab70cb56521b73c04ab8e378b7f165fc224 Mon Sep 17 00:00:00 2001 From: T-Aji Date: Fri, 16 Aug 2024 14:07:05 +0100 Subject: test: passing lambda_handler both no_changes and with changes to files --- tests/test_extract_lambda.py | 46 +++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 45 insertions(+), 1 deletion(-) diff --git a/tests/test_extract_lambda.py b/tests/test_extract_lambda.py index e94a8a4..4b61b83 100644 --- a/tests/test_extract_lambda.py +++ b/tests/test_extract_lambda.py @@ -3,9 +3,10 @@ import boto3 from moto import mock_aws from unittest.mock import patch, MagicMock from unittest import TestCase -from src.extract_lambda import list_existing_s3_files, connect_to_database, DBConnectionException, process_and_upload_tables +from src.extract_lambda import list_existing_s3_files, connect_to_database, DBConnectionException, lambda_handler, process_and_upload_tables import os import logging +import json @pytest.fixture(scope='class') def mock_config(): @@ -33,6 +34,49 @@ def s3_client(aws_credentials): with mock_aws(): yield boto3.client('s3') +class TestLambdaHandler: + def test_lambda_handler_files_processed_and_uploaded_successfully(self, mocker): + mock_db = MagicMock() + mock_db.run.side_effect = [ + [['Fruits']], + [['Vegetable', 'Sour', 'Green'], ['Berry', 'Sweet', 'Red']], + [['Food_type'], ['Flavour'], ['Colour']] + ] + mock_db.columns.return_value = [{'name': 'Food_type'}, {'name': 'Flavour'}, {'name': 'Colour'}] + with patch("src.extract_lambda.connect_to_database", return_value=mock_db): + mock_process_and_upload_tables = mocker.patch("src.extract_lambda.process_and_upload_tables", return_value=mock_db) + mock_list_existing_s3_files = mocker.patch("src.extract_lambda.list_existing_s3_files", return_value={}) + event = {} + context = {} + response = lambda_handler(event, context) + assert response['statusCode'] == 200 + assert json.loads(response['body']) == 'CSV files processed and uploaded successfully.' + mock_list_existing_s3_files.assert_called_once() + mock_process_and_upload_tables.assert_called_once_with(mock_db, {}) + mock_db.close.assert_called_once() + + def test_lambda_handler_no_changes_detected_no_files_uploaded(self, mocker): + mock_db = MagicMock() + mock_db.run.side_effect = [ + [['Fruits']], + [['Vegetable', 'Sour', 'Green'], ['Berry', 'Sweet', 'Red']], + [['Food_type'], ['Flavour'], ['Colour']] + ] + mock_db.columns.return_value = [{'name': 'Food_type'}, {'name': 'Flavour'}, {'name': 'Colour'}] + + with patch("src.extract_lambda.connect_to_database", return_value=mock_db): + mock_process_and_upload_tables = mocker.patch("src.extract_lambda.process_and_upload_tables", return_value=False) + mock_list_existing_s3_files = mocker.patch("src.extract_lambda.list_existing_s3_files", return_value={}) + event = {} + context = {} + response = lambda_handler(event, context) + assert response['statusCode'] == 200 + assert json.loads(response['body']) == 'No changes detected, no CSV files were uploaded.' + mock_list_existing_s3_files.assert_called_once() + mock_process_and_upload_tables.assert_called_once_with(mock_db, {}) + mock_db.close.assert_called_once() + + class TestListExistingS3Files: def test_error_if_no_bucket(self, s3_client, caplog): -- cgit v1.2.3 From e97ab6b46f181db107b7a640f386f5f57480347c Mon Sep 17 00:00:00 2001 From: lian-manonog Date: Fri, 16 Aug 2024 14:16:03 +0100 Subject: add makefile in root: not in use currently --- Makefile | 80 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 80 insertions(+) create mode 100644 Makefile diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..077cd98 --- /dev/null +++ b/Makefile @@ -0,0 +1,80 @@ +############################################## +# # +# MAKEFILE TO BUILD THE PROJECT # +# # +############################################## + +PROJECT_NAME = de-project-bentley +REGION = eu-west-2 +PYTHON_INTERPRETER = python +WD=$(shell pwd) +PYTHONPATH=${WD} +SHELL := /bin/bash +PROFILE = default +PIP:=pip + +## PYTHON INTERPRETER ENVIRONMENT +create-environment: + @echo ">>> About to create environment: $(PROJECT_NAME)..." + @echo ">>> check python3 version" + ( \ + $(PYTHON_INTERPRETER) --version; \ + ) + @echo ">>> Setting up VirtualEnv." + ( \ + $(PIP) install -q virtualenv virtualenvwrapper; \ + virtualenv venv --python=$(PYTHON_INTERPRETER); \ + ) + +ACTIVATE_ENV := source venv/bin/activate + +# Execute python related functionalities from within the project's environment +define execute_in_env + $(ACTIVATE_ENV) && $1 +endef + +## Build the environment requirements +requirements: create-environment + $(call execute_in_env, $(PIP) install -r ./requirements.txt) + +# Set Up +## Install bandit +bandit: + $(call execute_in_env, $(PIP) install bandit) + +## Install safety +safety: + $(call execute_in_env, $(PIP) install safety) + +## Install black +black: + $(call execute_in_env, $(PIP) install black) + +## Install coverage +coverage: + $(call execute_in_env, $(PIP) install coverage) + +## Set up dev requirements (bandit, safety, black) +dev-setup: bandit safety black coverage + +# Build / Run + +## Run the security test (bandit + safety) +security-test: + $(call execute_in_env, safety check -r ./requirements.txt) + $(call execute_in_env, bandit -lll */*.py *c/*/*.py) + +## Run the black code check +run-black: + $(call execute_in_env, black ./src/*/*.py ./test/*/*.py) + +## Run the unit tests +unit-test: + $(call execute_in_env, PYTHONPATH=${PYTHONPATH} pytest -v) + +## Run the coverage check +check-coverage: + $(call execute_in_env, PYTHONPATH=${PYTHONPATH} pytest --cov=src test/) + +## Run all checks +run-checks: security-test run-black unit-test check-coverage -- cgit v1.2.3 From 2bcedc300f36760b55f0db8cfb4e724362d1c251 Mon Sep 17 00:00:00 2001 From: Alex Schofield Date: Fri, 16 Aug 2024 14:27:41 +0100 Subject: chore(ci): remove redundant on-commit.yml --- .github/workflows/on-commit.yml | 50 ----------------------------------------- 1 file changed, 50 deletions(-) delete mode 100644 .github/workflows/on-commit.yml diff --git a/.github/workflows/on-commit.yml b/.github/workflows/on-commit.yml deleted file mode 100644 index fd9ffb8..0000000 --- a/.github/workflows/on-commit.yml +++ /dev/null @@ -1,50 +0,0 @@ -name: commit-qc-checks - -on: - push: - branches-ignore: - - 'main' - -jobs: - python-quality-checks: - runs-on: ubuntu-latest - steps: - - uses : actions/checkout@v4 - - name : 'Python: Setup' - uses : actions/setup-python@v5 - with: - python-version: 3.11 - - name : 'Python: Install Dependencies' - run: | - python -m pip install --upgrade pip - pip install flake8 pylint black bandit safety - continue-on-error: true - - name : 'Python: Linting' - run: | - flake8 . - find . -name "*.py" | xargs pylint - continue-on-error: true - - name : 'Python: Formatting' - run: | - black --check . - continue-on-error: true - terraform-quality-checks: - runs-on: ubuntu-latest - steps: - - uses : actions/checkout@v4 - - name: 'Terraform: Setup' - uses: hashicorp/setup-terraform@v3 - with: - terraform_version: latest - - name: 'Terraform: Formatting' - working-directory: terraform - run: terraform fmt -check -recursive - continue-on-error: true - - name: 'Terraform: Initialise' - working-directory: terraform - run: terraform init -backend=false - continue-on-error: true - - name: 'Terraform: Validate' - working-directory: terraform - run: terraform validate - continue-on-error: true \ No newline at end of file -- cgit v1.2.3 From cf3d366e730e88ceea194d5b3b1d1a3ddecdd944 Mon Sep 17 00:00:00 2001 From: Alex Schofield Date: Fri, 16 Aug 2024 14:30:07 +0100 Subject: ci: deploy only on push/pr to main --- .github/workflows/deploy.yml | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml index bd9df57..db51d20 100644 --- a/.github/workflows/deploy.yml +++ b/.github/workflows/deploy.yml @@ -1,10 +1,13 @@ name: deploy-terraform on: - push: + pull-request: branches: - - ci-cd-branch # Adjust the branch based on our deployment strategy - + - main + pull: + branches: + - main + jobs: deploy-terraform: @@ -36,4 +39,4 @@ jobs: - name: Terraform Apply working-directory: terraform - run: terraform apply --auto-approve \ No newline at end of file + run: terraform apply --auto-approve -- cgit v1.2.3 From 63b5f3e5f1888d5653d2f7b3529b3d72e3315dbf Mon Sep 17 00:00:00 2001 From: Alex Schofield Date: Fri, 16 Aug 2024 14:43:46 +0100 Subject: fix(ci): amend pull_request syntax --- .github/workflows/deploy.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml index db51d20..00c7263 100644 --- a/.github/workflows/deploy.yml +++ b/.github/workflows/deploy.yml @@ -1,7 +1,7 @@ name: deploy-terraform on: - pull-request: + pull_request: branches: - main pull: -- cgit v1.2.3 From 9cec304b2f8c2832c4a715bba784a34f7c674c19 Mon Sep 17 00:00:00 2001 From: Alex Schofield Date: Fri, 16 Aug 2024 14:52:35 +0100 Subject: fix(ci): amend pull to push --- .github/workflows/deploy.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml index 00c7263..5672048 100644 --- a/.github/workflows/deploy.yml +++ b/.github/workflows/deploy.yml @@ -4,7 +4,7 @@ on: pull_request: branches: - main - pull: + push: branches: - main -- cgit v1.2.3 From aba65e0db08625c1ef0d3db6076b54e56e0b45ea Mon Sep 17 00:00:00 2001 From: Ang Bel Date: Fri, 16 Aug 2024 14:20:39 +0100 Subject: refactor following github actions major risk message --- src/extract_lambda.py | 65 ++++++++++++++++++++++++--------------------------- 1 file changed, 31 insertions(+), 34 deletions(-) diff --git a/src/extract_lambda.py b/src/extract_lambda.py index 323d04a..cc09e87 100644 --- a/src/extract_lambda.py +++ b/src/extract_lambda.py @@ -30,8 +30,8 @@ def lambda_handler(event, context): db = connect_to_database() existing_files = list_existing_s3_files() any_changes = process_and_upload_tables(db, existing_files) - - if not any_changes: + + if not any_changes['updated']: logger.info("No changes detected in the database.") return { "statusCode": 200, @@ -39,8 +39,9 @@ def lambda_handler(event, context): } else: return { - "statusCode": 200, - "body": json.dumps("CSV files processed and uploaded successfully."), + 'statusCode': 200, + 'body': json.dumps(f"""CSV files processed for {', '.join(any_changes['updated'])} and uploaded successfully.{ + 'The following tables were not updated: '+', '.join(any_changes['no change']) if any_changes['no change'] else ''}""") } except Exception as e: logger.error(f"Error: {e}") @@ -124,7 +125,8 @@ def process_and_upload_tables(db, existing_files, client=boto3.client("s3")): in the existing_files dictionary with the same name. If it finds any changes to files, or new tables/files it uploads them to the s3 bucket """ - # NEW CODE + load_status = {'updated':[],'no change':[]} + ## Retrieving the latest file timestamp from S3 extract bucket all_datetimes = [] for file_names in existing_files.keys(): datetime_str_on_s3 = "".join( @@ -132,39 +134,34 @@ def process_and_upload_tables(db, existing_files, client=boto3.client("s3")): ) all_datetimes.append(datetime.strptime(datetime_str_on_s3, "%Y/%m/%d/%H:%M:%S")) latest_timestamp = max(all_datetimes) - # END OF NEW CODE - tables = db.run( - "SELECT table_name FROM information_schema.tables WHERE table_schema='public' AND table_type='BASE TABLE';" - ) - print(tables) + ## Iterating through tables on the database and retrieving only latest changes vs previous file load + tables = db.run("SELECT table_name FROM information_schema.tables WHERE table_schema='public' AND table_type='BASE TABLE';") for table in tables: table_name = table[0] rows = db.run( f"SELECT * FROM {table_name} WHERE last_updated >= {datetime.strftime(latest_timestamp,'%H-%m-%d %H:%M:%S')};" ) - if rows: - csv_file_path = f"/tmp/{table_name}.csv" - with open(csv_file_path, "w", newline="") as file: - writer = csv.writer(file) - # column_names = [desc["name"] for desc in db.columns(f"SELECT * FROM {table_name};")] - column_names = [ - col_name[0] - for col_name in db.run( - f"SELECT column_name FROM INFORMATION_SCHEMA.COLUMNS where table_name = '{table_name}';" - ) - ] - writer.writerow(column_names) - writer.writerows(rows) - s3_key = datetime.strftime( - datetime.today(), f"{table_name}/%Y/%m/%d/{table_name}_%H:%M:%S.csv" - ) - - try: - client.upload_file(csv_file_path, extract_bucket(), s3_key) - logger.info(f"Uploaded {s3_key} to S3.") - except ClientError as e: - logger.error(f"Error uploading to S3: {e}") - else: - logger.info(f"No new data.") + ## Creating a temporary file path and writing the column name to it followed by each row of data + if rows: + csv_file_path = f"/tmp/{table_name}.csv" + with open(csv_file_path, "w", newline='') as file: + writer = csv.writer(file) + #column_names = [desc["name"] for desc in db.columns(f"SELECT * FROM {table_name};")] + column_names = [col_name[0] for col_name in db.run(f"SELECT column_name FROM INFORMATION_SCHEMA.COLUMNS where table_name = '{table_name}';")] + writer.writerow(column_names) + writer.writerows(rows) + s3_key = datetime.strftime(datetime.today(),f'{table_name}/%Y/%m/%d/{table_name}_%H:%M:%S.csv') + + ## Writing the new file to S3 extract bucket: + try: + client.upload_file(csv_file_path, extract_bucket(), s3_key) + load_status['updated'].append(table_name) + logger.info(f"Uploaded {s3_key} to S3.") + except ClientError as e: + logger.error(f'Error uploading to S3: {e}') + else: + load_status['no change'].append(table_name) + logger.info(f"No new data in {table_name} name. Latest data retrieved is from {latest_timestamp}.") + return load_status -- cgit v1.2.3 From 4428b8d9e8903e93ca2efd9f95cea9205bf303a9 Mon Sep 17 00:00:00 2001 From: Ang Bel Date: Fri, 16 Aug 2024 14:42:15 +0100 Subject: refactoring to be more in line with pythonic code practices and prevent sql injection --- src/extract_lambda.py | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/src/extract_lambda.py b/src/extract_lambda.py index cc09e87..d1a5c7c 100644 --- a/src/extract_lambda.py +++ b/src/extract_lambda.py @@ -1,4 +1,4 @@ -from pg8000.native import Connection, InterfaceError +from pg8000.native import Connection, InterfaceError, identifier import boto3 import csv from botocore.exceptions import ClientError @@ -136,12 +136,15 @@ def process_and_upload_tables(db, existing_files, client=boto3.client("s3")): latest_timestamp = max(all_datetimes) ## Iterating through tables on the database and retrieving only latest changes vs previous file load - tables = db.run("SELECT table_name FROM information_schema.tables WHERE table_schema='public' AND table_type='BASE TABLE';") + tables = db.run(""" + SELECT table_name + FROM information_schema.tables + WHERE table_schema='public' AND table_type='BASE TABLE';""") for table in tables: table_name = table[0] - rows = db.run( - f"SELECT * FROM {table_name} WHERE last_updated >= {datetime.strftime(latest_timestamp,'%H-%m-%d %H:%M:%S')};" - ) + rows = db.run(f"SELECT * FROM {identifier(table_name)} " + "WHERE last_updated >= :latest;", + latest={datetime.strftime(latest_timestamp,'%H-%m-%d %H:%M:%S')}) ## Creating a temporary file path and writing the column name to it followed by each row of data if rows: @@ -149,7 +152,9 @@ def process_and_upload_tables(db, existing_files, client=boto3.client("s3")): with open(csv_file_path, "w", newline='') as file: writer = csv.writer(file) #column_names = [desc["name"] for desc in db.columns(f"SELECT * FROM {table_name};")] - column_names = [col_name[0] for col_name in db.run(f"SELECT column_name FROM INFORMATION_SCHEMA.COLUMNS where table_name = '{table_name}';")] + column_names = [col_name[0] for col_name in + db.run("""SELECT column_name FROM INFORMATION_SCHEMA.COLUMNS + WHERE table_name = :table ;""", table=table_name)] writer.writerow(column_names) writer.writerows(rows) s3_key = datetime.strftime(datetime.today(),f'{table_name}/%Y/%m/%d/{table_name}_%H:%M:%S.csv') -- cgit v1.2.3 From e153f2072eafca2c83a84e2c4210c46a40dabaf4 Mon Sep 17 00:00:00 2001 From: "deepsource-autofix[bot]" <62050782+deepsource-autofix[bot]@users.noreply.github.com> Date: Fri, 16 Aug 2024 14:36:15 +0000 Subject: style: format code with Autopep8, Black and Ruff Formatter This commit fixes the style issues introduced in 4428b8d according to the output from Autopep8, Black and Ruff Formatter. Details: https://github.com/ajschofield/de-project-bentley/pull/47 --- src/extract_lambda.py | 66 +++++++++++++++++++++++++++++++-------------------- 1 file changed, 40 insertions(+), 26 deletions(-) diff --git a/src/extract_lambda.py b/src/extract_lambda.py index d1a5c7c..9a0e509 100644 --- a/src/extract_lambda.py +++ b/src/extract_lambda.py @@ -30,8 +30,8 @@ def lambda_handler(event, context): db = connect_to_database() existing_files = list_existing_s3_files() any_changes = process_and_upload_tables(db, existing_files) - - if not any_changes['updated']: + + if not any_changes["updated"]: logger.info("No changes detected in the database.") return { "statusCode": 200, @@ -39,9 +39,11 @@ def lambda_handler(event, context): } else: return { - 'statusCode': 200, - 'body': json.dumps(f"""CSV files processed for {', '.join(any_changes['updated'])} and uploaded successfully.{ - 'The following tables were not updated: '+', '.join(any_changes['no change']) if any_changes['no change'] else ''}""") + "statusCode": 200, + "body": json.dumps( + f"""CSV files processed for {', '.join(any_changes['updated'])} and uploaded successfully.{ + 'The following tables were not updated: '+', '.join(any_changes['no change']) if any_changes['no change'] else ''}""" + ), } except Exception as e: logger.error(f"Error: {e}") @@ -125,8 +127,8 @@ def process_and_upload_tables(db, existing_files, client=boto3.client("s3")): in the existing_files dictionary with the same name. If it finds any changes to files, or new tables/files it uploads them to the s3 bucket """ - load_status = {'updated':[],'no change':[]} - ## Retrieving the latest file timestamp from S3 extract bucket + load_status = {"updated": [], "no change": []} + # Retrieving the latest file timestamp from S3 extract bucket all_datetimes = [] for file_names in existing_files.keys(): datetime_str_on_s3 = "".join( @@ -135,38 +137,50 @@ def process_and_upload_tables(db, existing_files, client=boto3.client("s3")): all_datetimes.append(datetime.strptime(datetime_str_on_s3, "%Y/%m/%d/%H:%M:%S")) latest_timestamp = max(all_datetimes) - ## Iterating through tables on the database and retrieving only latest changes vs previous file load - tables = db.run(""" + # Iterating through tables on the database and retrieving only latest changes vs previous file load + tables = db.run( + """ SELECT table_name FROM information_schema.tables - WHERE table_schema='public' AND table_type='BASE TABLE';""") + WHERE table_schema='public' AND table_type='BASE TABLE';""" + ) for table in tables: table_name = table[0] - rows = db.run(f"SELECT * FROM {identifier(table_name)} " - "WHERE last_updated >= :latest;", - latest={datetime.strftime(latest_timestamp,'%H-%m-%d %H:%M:%S')}) + rows = db.run( + f"SELECT * FROM {identifier(table_name)} " "WHERE last_updated >= :latest;", + latest={datetime.strftime(latest_timestamp, "%H-%m-%d %H:%M:%S")}, + ) - ## Creating a temporary file path and writing the column name to it followed by each row of data + # Creating a temporary file path and writing the column name to it followed by each row of data if rows: csv_file_path = f"/tmp/{table_name}.csv" - with open(csv_file_path, "w", newline='') as file: + with open(csv_file_path, "w", newline="") as file: writer = csv.writer(file) - #column_names = [desc["name"] for desc in db.columns(f"SELECT * FROM {table_name};")] - column_names = [col_name[0] for col_name in - db.run("""SELECT column_name FROM INFORMATION_SCHEMA.COLUMNS - WHERE table_name = :table ;""", table=table_name)] + # column_names = [desc["name"] for desc in db.columns(f"SELECT * FROM {table_name};")] + column_names = [ + col_name[0] + for col_name in db.run( + """SELECT column_name FROM INFORMATION_SCHEMA.COLUMNS + WHERE table_name = :table ;""", + table=table_name, + ) + ] writer.writerow(column_names) writer.writerows(rows) - s3_key = datetime.strftime(datetime.today(),f'{table_name}/%Y/%m/%d/{table_name}_%H:%M:%S.csv') + s3_key = datetime.strftime( + datetime.today(), f"{table_name}/%Y/%m/%d/{table_name}_%H:%M:%S.csv" + ) - ## Writing the new file to S3 extract bucket: + # Writing the new file to S3 extract bucket: try: client.upload_file(csv_file_path, extract_bucket(), s3_key) - load_status['updated'].append(table_name) + load_status["updated"].append(table_name) logger.info(f"Uploaded {s3_key} to S3.") except ClientError as e: - logger.error(f'Error uploading to S3: {e}') + logger.error(f"Error uploading to S3: {e}") else: - load_status['no change'].append(table_name) - logger.info(f"No new data in {table_name} name. Latest data retrieved is from {latest_timestamp}.") - return load_status + load_status["no change"].append(table_name) + logger.info( + f"No new data in {table_name} name. Latest data retrieved is from {latest_timestamp}." + ) + return load_status -- cgit v1.2.3 From 890ca0434ce5f7c9e7bdba1482a86cd63a4ef8f9 Mon Sep 17 00:00:00 2001 From: Ang Bel Date: Fri, 16 Aug 2024 15:45:03 +0100 Subject: dummy comment to test checks --- src/extract_lambda.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/extract_lambda.py b/src/extract_lambda.py index 9a0e509..30c7005 100644 --- a/src/extract_lambda.py +++ b/src/extract_lambda.py @@ -11,7 +11,7 @@ import re logger = logging.getLogger() logger.setLevel(logging.INFO) - +## DB Exception class class DBConnectionException(Exception): """Wraps pg8000.native Error or DatabaseError.""" -- cgit v1.2.3 From 653cb35e50b339356274ff03c0d75ac3babf927f Mon Sep 17 00:00:00 2001 From: "deepsource-autofix[bot]" <62050782+deepsource-autofix[bot]@users.noreply.github.com> Date: Fri, 16 Aug 2024 14:45:16 +0000 Subject: style: format code with Autopep8, Black and Ruff Formatter This commit fixes the style issues introduced in 890ca04 according to the output from Autopep8, Black and Ruff Formatter. Details: https://github.com/ajschofield/de-project-bentley/pull/47 --- src/extract_lambda.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/extract_lambda.py b/src/extract_lambda.py index 30c7005..4168e27 100644 --- a/src/extract_lambda.py +++ b/src/extract_lambda.py @@ -11,7 +11,9 @@ import re logger = logging.getLogger() logger.setLevel(logging.INFO) -## DB Exception class +# DB Exception class + + class DBConnectionException(Exception): """Wraps pg8000.native Error or DatabaseError.""" -- cgit v1.2.3 From 39a33cecb5e19f15bed4a099b02bdba56c80c073 Mon Sep 17 00:00:00 2001 From: HastarTara Date: Fri, 16 Aug 2024 16:05:03 +0100 Subject: infra[tf] update lambda extract doesnt work yet --- terraform/lambda.tf | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/terraform/lambda.tf b/terraform/lambda.tf index 658b8c8..71ddd11 100644 --- a/terraform/lambda.tf +++ b/terraform/lambda.tf @@ -83,7 +83,7 @@ resource "aws_lambda_function" "load_lambda" { } locals { - layer_dir = "${path.module}/../python" + layer_dir = "${path.module}/.." requirements = "${path.module}/../requirements.txt" layer_zip = "${path.module}/../layer.zip" } @@ -94,9 +94,9 @@ resource "null_resource" "prepare_layer" { } provisioner "local-exec" { command = < Date: Fri, 16 Aug 2024 16:23:56 +0100 Subject: infra(tf): add version constraints for null and archive --- terraform/main.tf | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/terraform/main.tf b/terraform/main.tf index 3b06701..310a251 100644 --- a/terraform/main.tf +++ b/terraform/main.tf @@ -4,6 +4,14 @@ terraform { source = "hashicorp/aws" version = "~>5.0" } + null = { + source = "hashicorp/null" + version = "~>3.2.2" + } + archive = { + source = "hashicorp/archive" + version = "~>2.5.0" + } } backend "s3" { bucket = "bentley-project-secrets" -- cgit v1.2.3 From 303725f83cf5551b3d165aa02ce81562de488a01 Mon Sep 17 00:00:00 2001 From: Alex Schofield Date: Fri, 16 Aug 2024 16:24:44 +0100 Subject: infra(tf): re-add code that creates layer zip --- terraform/lambda.tf | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/terraform/lambda.tf b/terraform/lambda.tf index 71ddd11..67fd6eb 100644 --- a/terraform/lambda.tf +++ b/terraform/lambda.tf @@ -83,9 +83,9 @@ resource "aws_lambda_function" "load_lambda" { } locals { - layer_dir = "${path.module}/.." - requirements = "${path.module}/../requirements.txt" - layer_zip = "${path.module}/../layer.zip" + layer_dir = "${path.module}/.." + requirements = "${path.module}/../requirements.txt" + layer_zip = "${path.module}/../layer.zip" } resource "null_resource" "prepare_layer" { @@ -96,23 +96,23 @@ resource "null_resource" "prepare_layer" { command = < Date: Fri, 16 Aug 2024 16:28:04 +0100 Subject: chore(tf): remove dummy username/password sorry hackers! --- terraform/rds.tf | 14 ++------------ 1 file changed, 2 insertions(+), 12 deletions(-) diff --git a/terraform/rds.tf b/terraform/rds.tf index d1b4959..a013fb3 100644 --- a/terraform/rds.tf +++ b/terraform/rds.tf @@ -60,18 +60,8 @@ # allocated_storage = 5 # engine = "postgres" # engine_version = "14.10" -# username = "totes" -# password = "totes123" -# # username = "user credentials for the root user" # we could use .env here -# # password = "user password for the root user" # we could use .env here -# ### alternatively to providing username nad password we can specify: -# # resource "aws_kms_key" "example_key" { -# # description = "Example KMS Key" -# # } -# # within the resource: -# # manage_master_user_password = true -# # master_user_secret_kms_key_id = aws_kms_key.example.key_id -# # } +# username = "" +# password = "" # db_subnet_group_name = aws_db_subnet_group.Terrific-Totes-sub-gr.name # vpc_security_group_ids = [aws_security_group.rds.id] # parameter_group_name = aws_db_parameter_group.Terrific-Totes-param-gr.name -- cgit v1.2.3 From 1e27974ecc48d8611b87af1b9cd51e29afa8c792 Mon Sep 17 00:00:00 2001 From: Alex Schofield Date: Fri, 16 Aug 2024 17:15:59 +0100 Subject: test(fx): fix prepare_layer - broken --- terraform/lambda.tf | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/terraform/lambda.tf b/terraform/lambda.tf index 67fd6eb..27e6266 100644 --- a/terraform/lambda.tf +++ b/terraform/lambda.tf @@ -89,14 +89,13 @@ locals { } resource "null_resource" "prepare_layer" { - triggers = { - requirements_hash = filesha1(local.requirements) - } provisioner "local-exec" { command = < Date: Fri, 16 Aug 2024 21:06:51 +0100 Subject: docs: add badges to README --- README.md | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/README.md b/README.md index 6bc75dc..cbb446c 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,13 @@ # ToteSys - Data Engineering Project +[![Python](https://img.shields.io/badge/Python-FFD43B?style=for-the-badge&logo=python&logoColor=blue)](https://www.python.org/) +[![AWS](https://img.shields.io/badge/Amazon_AWS-FF9900?style=for-the-badge&logo=amazonaws&logoColor=white)](https://aws.amazon.com/) +[![Terraform](https://img.shields.io/badge/Terraform-7B42BC?style=for-the-badge&logo=terraform&logoColor=white)](https://www.terraform.io/) +[![Postgresql](https://img.shields.io/badge/PostgreSQL-316192?style=for-the-badge&logo=postgresql&logoColor=white)](https://www.postgresql.org/) +[![GitHub Actions](https://img.shields.io/badge/GitHub_Actions-2088FF?style=for-the-badge&logo=github-actions&logoColor=white)](https://github.com/features/actions) + +[![Terraform Main Deployment Workflow Status](https://img.shields.io/github/actions/workflow/status/ajschofield/de-project-bentley/deploy.yml?branch=main&style=flat-square&label=deploy)](https://github.com/ajschofield/de-project-bentley/actions/workflows/deploy.yml?query=branch%3Amain) +[![Production Environment Status](https://img.shields.io/github/deployments/ajschofield/de-project-bentley/production?style=flat-square&label=env)](https://github.com/ajschofield/de-project-bentley/deployments/production) # Summary The project aims to implement a data platform that can extract data from an operational database, archive it in a data lake, and make it easily accessible -- cgit v1.2.3 From 95ad71be4315f5ae3f9183f66049ae8b8cf914fc Mon Sep 17 00:00:00 2001 From: "deepsource-autofix[bot]" <62050782+deepsource-autofix[bot]@users.noreply.github.com> Date: Fri, 16 Aug 2024 20:07:43 +0000 Subject: style: format code with Autopep8, Black and Ruff Formatter This commit fixes the style issues introduced in 9dabc89 according to the output from Autopep8, Black and Ruff Formatter. Details: https://github.com/ajschofield/de-project-bentley/pull/52 --- src/load_lambda.py | 2 +- src/secrets_manager.py | 31 +++++++++---------- src/transform_lambda.py | 2 +- test/test_secrets_manager.py | 19 +++++++----- tests/test_extract_lambda.py | 69 ++++++++++++++++++++++++------------------- tests/test_secrets_manager.py | 37 +++++++++++++++-------- 6 files changed, 93 insertions(+), 67 deletions(-) diff --git a/src/load_lambda.py b/src/load_lambda.py index 6ee681f..c6a8e60 100644 --- a/src/load_lambda.py +++ b/src/load_lambda.py @@ -1,2 +1,2 @@ def lambda_handler(): - pass \ No newline at end of file + pass diff --git a/src/secrets_manager.py b/src/secrets_manager.py index c0fb61e..3484688 100644 --- a/src/secrets_manager.py +++ b/src/secrets_manager.py @@ -4,45 +4,46 @@ import json def sm_client(): - sm_client = boto3.client('secretsmanager') + sm_client = boto3.client("secretsmanager") yield sm_client -def create_secret(sm_client, secret_name, cohort_id, user, password, host, database, port): + +def create_secret( + sm_client, secret_name, cohort_id, user, password, host, database, port +): secret = { "cohort_id": cohort_id, "user": user, "password": password, "host": host, "database": database, - "port": port + "port": port, } response = sm_client.create_secret( - Name = secret_name, - SecretString = json.dumps(secret) + Name=secret_name, SecretString=json.dumps(secret) ) print(response) return response + def list_secret(sm_client): response = sm_client.list_secrets() - secret_dict = response['SecretList'] + secret_dict = response["SecretList"] secret_names = [] for items in secret_dict: - secret_names.append(items['Name']) - print(f'{len(secret_names)} secret(s) available') + secret_names.append(items["Name"]) + print(f"{len(secret_names)} secret(s) available") for name in secret_names: print(name) return secret_names -def retrieve_secrets(sm_client): - response = sm_client.get_secrets( - - ) +def retrieve_secrets(sm_client): + response = sm_client.get_secrets() -#retrieve secret -#so lambda can access totesy db -#so lambda connect to the db and then retrieve the data \ No newline at end of file +# retrieve secret +# so lambda can access totesy db +# so lambda connect to the db and then retrieve the data diff --git a/src/transform_lambda.py b/src/transform_lambda.py index 6ee681f..c6a8e60 100644 --- a/src/transform_lambda.py +++ b/src/transform_lambda.py @@ -1,2 +1,2 @@ def lambda_handler(): - pass \ No newline at end of file + pass diff --git a/test/test_secrets_manager.py b/test/test_secrets_manager.py index 86533bc..cb4ec15 100644 --- a/test/test_secrets_manager.py +++ b/test/test_secrets_manager.py @@ -2,10 +2,12 @@ from src.secrets_manager import sm_client, create_secret, list_secret import boto3 from moto import mock_aws import json -import pytest +import pytest import os -pytest.fixture(scope='class') +pytest.fixture(scope="class") + + def mock_aws_credentials(): """Mocked AWS Credentials for moto.""" os.environ["AWS_ACCESS_KEY_ID"] = "testing" @@ -14,10 +16,11 @@ def mock_aws_credentials(): os.environ["AWS_SESSION_TOKEN"] = "testing" os.environ["AWS_DEFAULT_REGION"] = "eu-west-2" -@pytest.fixture(scope='class') + +@pytest.fixture(scope="class") def mock_sm_client(mock_aws_credentials): with mock_aws(): - yield boto3.client('secretsmanager') + yield boto3.client("secretsmanager") def test_create_secret_stores_secrets(mock_sm_client): @@ -29,6 +32,8 @@ def test_create_secret_stores_secrets(mock_sm_client): port = "test_port" secret_name = "test_secret" - response = create_secret(mock_sm_client, secret_name, cohort_id, user, password, host, database, port) - - assert response['Name'] == secret_name \ No newline at end of file + response = create_secret( + mock_sm_client, secret_name, cohort_id, user, password, host, database, port + ) + + assert response["Name"] == secret_name diff --git a/tests/test_extract_lambda.py b/tests/test_extract_lambda.py index e94a8a4..877e36a 100644 --- a/tests/test_extract_lambda.py +++ b/tests/test_extract_lambda.py @@ -3,11 +3,17 @@ import boto3 from moto import mock_aws from unittest.mock import patch, MagicMock from unittest import TestCase -from src.extract_lambda import list_existing_s3_files, connect_to_database, DBConnectionException, process_and_upload_tables -import os +from src.extract_lambda import ( + list_existing_s3_files, + connect_to_database, + DBConnectionException, + process_and_upload_tables, +) +import os import logging -@pytest.fixture(scope='class') + +@pytest.fixture(scope="class") def mock_config(): env_vars = { "host": "abc", @@ -20,54 +26,55 @@ def mock_config(): yield mock_config -@pytest.fixture(scope='class') +@pytest.fixture(scope="class") def aws_credentials(): - os.environ["AWS_ACCESS_KEY_ID"] = 'testing' - os.environ["AWS_SECRET_ACCESS_KEY"] = 'testing' - os.environ["AWS_SECURIT_TOKEN"] = 'testing' - os.environ["AWS_SESSION_TOKEN"] = 'testing' - os.environ["AWS_DEFAULT_REGION"]= 'eu-west-2' + os.environ["AWS_ACCESS_KEY_ID"] = "testing" + os.environ["AWS_SECRET_ACCESS_KEY"] = "testing" + os.environ["AWS_SECURIT_TOKEN"] = "testing" + os.environ["AWS_SESSION_TOKEN"] = "testing" + os.environ["AWS_DEFAULT_REGION"] = "eu-west-2" + -@pytest.fixture(scope='class') +@pytest.fixture(scope="class") def s3_client(aws_credentials): with mock_aws(): - yield boto3.client('s3') + yield boto3.client("s3") + class TestListExistingS3Files: def test_error_if_no_bucket(self, s3_client, caplog): - logger = logging.getLogger() - logger.info('Testing now.') + logger.info("Testing now.") caplog.set_level(logging.ERROR) list_existing_s3_files(client=s3_client) - assert 'Error listing S3 objects' in caplog.text + assert "Error listing S3 objects" in caplog.text def test_error_if_bucket_is_empty(self, s3_client, caplog): - - s3_client.create_bucket(Bucket='extract_bucket', - CreateBucketConfiguration={ - 'LocationConstraint': 'eu-west-2' - }) + s3_client.create_bucket( + Bucket="extract_bucket", + CreateBucketConfiguration={"LocationConstraint": "eu-west-2"}, + ) list_existing_s3_files(client=s3_client) - assert 'The bucket is empty' in caplog.text + assert "The bucket is empty" in caplog.text def test_error_retrieving_object(self, s3_client, caplog): - s3_client.upload_file('tests/dummy.txt', 'extract_bucket', 'dummy.txt') - list_existing_s3_files(bucket_name='test_bucket', client=s3_client) + s3_client.upload_file("tests/dummy.txt", "extract_bucket", "dummy.txt") + list_existing_s3_files(bucket_name="test_bucket", client=s3_client) - assert 'Error retrieving S3 object ' in caplog.text + assert "Error retrieving S3 object " in caplog.text def test_retrieves_file_content(self, s3_client, caplog): result = list_existing_s3_files(client=s3_client) - assert list(result.values()) == ['This is a test file.'] + assert list(result.values()) == ["This is a test file."] + class TestConnectToDatabase: def test_connect_to_database(mock_conn, mock_config): - with patch("src.extract_lambda.Connection", autospec=True) as mock_conn: + with patch("src.extract_lambda.Connection", autospec=True) as mock_conn: connect_to_database() mock_conn.assert_called_with( - host="abc", user="def", port="5432", password="password", database="db" + host="abc", user="def", port="5432", password="password", database="db" ) def test_database_error(self, mock_config): @@ -76,12 +83,14 @@ class TestConnectToDatabase: def test_logs_interface_error(self, caplog): logger = logging.getLogger() - logger.info('Testing now.') + logger.info("Testing now.") caplog.set_level(logging.ERROR) with pytest.raises(DBConnectionException): connect_to_database() - assert 'Interface error' in caplog.text -''' + assert "Interface error" in caplog.text + + +""" class TestProcessAndUploadTables: def test_error_process_and_upload_tables(mock_conn, mock_config, s3_client, caplog): logger = logging.getLogger() @@ -106,4 +115,4 @@ class TestProcessAndUploadTables: s3_client.upload_file('tests/dummy_identical.csv', 'extract_bucket', s3_key) process_and_upload_tables(mock_db(), existing_files, client=s3_client) assert 'No new data.' in caplog.text -''' \ No newline at end of file +""" diff --git a/tests/test_secrets_manager.py b/tests/test_secrets_manager.py index a30be86..609c572 100644 --- a/tests/test_secrets_manager.py +++ b/tests/test_secrets_manager.py @@ -3,10 +3,11 @@ import boto3 import botocore.exceptions from moto import mock_aws import json -import pytest +import pytest import os -@pytest.fixture(scope='function') + +@pytest.fixture(scope="function") def aws_credentials(): """Mocked AWS Credentials for moto.""" os.environ["AWS_ACCESS_KEY_ID"] = "testing" @@ -15,12 +16,14 @@ def aws_credentials(): os.environ["AWS_SESSION_TOKEN"] = "testing" os.environ["AWS_DEFAULT_REGION"] = "eu-west-2" -@pytest.fixture(scope='function') + +@pytest.fixture(scope="function") def mock_sm_client(aws_credentials): with mock_aws(): yield boto3.client("secretsmanager") -@pytest.fixture(scope='function') + +@pytest.fixture(scope="function") def mock_store_secret(mock_sm_client): secret = { "cohort_id": "test_cohort_id", @@ -28,15 +31,18 @@ def mock_store_secret(mock_sm_client): "password": "test_password", "host": "test_host", "database": "test_database", - "port": "test_port" + "port": "test_port", } secret_name = "test_secret" - response = mock_sm_client.create_secret(Name=secret_name, SecretString=json.dumps(secret)) + response = mock_sm_client.create_secret( + Name=secret_name, SecretString=json.dumps(secret) + ) return response + def test_retrieves_secrets_returns_dictionary(mock_sm_client, mock_store_secret): secret_name = "test_secret" @@ -44,8 +50,10 @@ def test_retrieves_secrets_returns_dictionary(mock_sm_client, mock_store_secret) assert isinstance(result, dict) -def test_retrieves_secrets_returns_correct_keys_and_values(mock_sm_client, mock_store_secret): +def test_retrieves_secrets_returns_correct_keys_and_values( + mock_sm_client, mock_store_secret +): secret_name = "test_secret" result = retrieve_secrets(mock_sm_client, secret_name) @@ -57,17 +65,20 @@ def test_retrieves_secrets_returns_correct_keys_and_values(mock_sm_client, mock_ assert result["database"] == "test_database" assert result["port"] == "test_port" -def test_retrieves_secrets_raises_error_if_secret_name_incorrect_data_type(mock_sm_client): - secret_name = [1, 2, 3] +def test_retrieves_secrets_raises_error_if_secret_name_incorrect_data_type( + mock_sm_client, +): + secret_name = [1, 2, 3] with pytest.raises(botocore.exceptions.ParamValidationError) as error: retrieve_secrets(mock_sm_client, secret_name) -def test_retrieves_secrets_raises_error_if_secret_name_does_not_exist(mock_sm_client, mock_store_secret): - secret_name = 'test_secret_2' - +def test_retrieves_secrets_raises_error_if_secret_name_does_not_exist( + mock_sm_client, mock_store_secret +): + secret_name = "test_secret_2" with pytest.raises(botocore.exceptions.ClientError) as error: - retrieve_secrets(mock_sm_client, secret_name) \ No newline at end of file + retrieve_secrets(mock_sm_client, secret_name) -- cgit v1.2.3 From afc889de865e6ce42b19ce89c57e9bfed98d6757 Mon Sep 17 00:00:00 2001 From: T-Aji Date: Mon, 19 Aug 2024 09:27:20 +0100 Subject: test: handler exception test failing --- tests/test_extract_lambda.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/tests/test_extract_lambda.py b/tests/test_extract_lambda.py index 4b61b83..bc40df1 100644 --- a/tests/test_extract_lambda.py +++ b/tests/test_extract_lambda.py @@ -76,6 +76,17 @@ class TestLambdaHandler: mock_process_and_upload_tables.assert_called_once_with(mock_db, {}) mock_db.close.assert_called_once() + def test_lambda_handler_exception_error(self, mocker): + with patch("src.extract_lambda.connect_to_database", side_effect=Exception("Database connection error")): + mock_process_and_upload_tables = mocker.patch("src.extract_lambda.process_and_upload_tables") + mock_list_existing_s3_files = mocker.patch("src.extract_lambda.list_existing_s3_files") + event = {} + context = {} + response = lambda_handler(event, context) + assert response['statusCode'] == 500 + assert json.loads(response['body']) == 'Internal server error.' + mock_list_existing_s3_files.assert_not_called() + mock_process_and_upload_tables.assert_not_called() class TestListExistingS3Files: def test_error_if_no_bucket(self, s3_client, caplog): -- cgit v1.2.3 From dd536c3209fc37423af4219a941c006bdb6b3c4f Mon Sep 17 00:00:00 2001 From: lian-manonog Date: Mon, 19 Aug 2024 10:32:57 +0100 Subject: deleted the test folder --- test/test_secrets_manager.py | 34 ---------------------------------- 1 file changed, 34 deletions(-) delete mode 100644 test/test_secrets_manager.py diff --git a/test/test_secrets_manager.py b/test/test_secrets_manager.py deleted file mode 100644 index 86533bc..0000000 --- a/test/test_secrets_manager.py +++ /dev/null @@ -1,34 +0,0 @@ -from src.secrets_manager import sm_client, create_secret, list_secret -import boto3 -from moto import mock_aws -import json -import pytest -import os - -pytest.fixture(scope='class') -def mock_aws_credentials(): - """Mocked AWS Credentials for moto.""" - os.environ["AWS_ACCESS_KEY_ID"] = "testing" - os.environ["AWS_SECRET_ACCESS_KEY"] = "testing" - os.environ["AWS_SECURITY_TOKEN"] = "testing" - os.environ["AWS_SESSION_TOKEN"] = "testing" - os.environ["AWS_DEFAULT_REGION"] = "eu-west-2" - -@pytest.fixture(scope='class') -def mock_sm_client(mock_aws_credentials): - with mock_aws(): - yield boto3.client('secretsmanager') - - -def test_create_secret_stores_secrets(mock_sm_client): - cohort_id = "test_cohort_id" - user = "test_user_id" - password = "test_password" - host = "test_host" - database = "test_database" - port = "test_port" - - secret_name = "test_secret" - response = create_secret(mock_sm_client, secret_name, cohort_id, user, password, host, database, port) - - assert response['Name'] == secret_name \ No newline at end of file -- cgit v1.2.3 From 5cc511d2afeea262db0db7039c8f83c123da77ea Mon Sep 17 00:00:00 2001 From: "deepsource-autofix[bot]" <62050782+deepsource-autofix[bot]@users.noreply.github.com> Date: Mon, 19 Aug 2024 09:55:43 +0000 Subject: style: format code with Autopep8, Black and Ruff Formatter This commit fixes the style issues introduced in afc889d according to the output from Autopep8, Black and Ruff Formatter. Details: https://github.com/ajschofield/de-project-bentley/pull/54 --- tests/test_extract_lambda.py | 144 +++++++++++++++++++++++++++---------------- 1 file changed, 92 insertions(+), 52 deletions(-) diff --git a/tests/test_extract_lambda.py b/tests/test_extract_lambda.py index bc40df1..67cb6d3 100644 --- a/tests/test_extract_lambda.py +++ b/tests/test_extract_lambda.py @@ -3,12 +3,19 @@ import boto3 from moto import mock_aws from unittest.mock import patch, MagicMock from unittest import TestCase -from src.extract_lambda import list_existing_s3_files, connect_to_database, DBConnectionException, lambda_handler, process_and_upload_tables -import os +from src.extract_lambda import ( + list_existing_s3_files, + connect_to_database, + DBConnectionException, + lambda_handler, + process_and_upload_tables, +) +import os import logging import json -@pytest.fixture(scope='class') + +@pytest.fixture(scope="class") def mock_config(): env_vars = { "host": "abc", @@ -21,36 +28,49 @@ def mock_config(): yield mock_config -@pytest.fixture(scope='class') +@pytest.fixture(scope="class") def aws_credentials(): - os.environ["AWS_ACCESS_KEY_ID"] = 'testing' - os.environ["AWS_SECRET_ACCESS_KEY"] = 'testing' - os.environ["AWS_SECURIT_TOKEN"] = 'testing' - os.environ["AWS_SESSION_TOKEN"] = 'testing' - os.environ["AWS_DEFAULT_REGION"]= 'eu-west-2' + os.environ["AWS_ACCESS_KEY_ID"] = "testing" + os.environ["AWS_SECRET_ACCESS_KEY"] = "testing" + os.environ["AWS_SECURIT_TOKEN"] = "testing" + os.environ["AWS_SESSION_TOKEN"] = "testing" + os.environ["AWS_DEFAULT_REGION"] = "eu-west-2" + -@pytest.fixture(scope='class') +@pytest.fixture(scope="class") def s3_client(aws_credentials): with mock_aws(): - yield boto3.client('s3') + yield boto3.client("s3") + class TestLambdaHandler: def test_lambda_handler_files_processed_and_uploaded_successfully(self, mocker): mock_db = MagicMock() mock_db.run.side_effect = [ - [['Fruits']], - [['Vegetable', 'Sour', 'Green'], ['Berry', 'Sweet', 'Red']], - [['Food_type'], ['Flavour'], ['Colour']] + [["Fruits"]], + [["Vegetable", "Sour", "Green"], ["Berry", "Sweet", "Red"]], + [["Food_type"], ["Flavour"], ["Colour"]], + ] + mock_db.columns.return_value = [ + {"name": "Food_type"}, + {"name": "Flavour"}, + {"name": "Colour"}, ] - mock_db.columns.return_value = [{'name': 'Food_type'}, {'name': 'Flavour'}, {'name': 'Colour'}] with patch("src.extract_lambda.connect_to_database", return_value=mock_db): - mock_process_and_upload_tables = mocker.patch("src.extract_lambda.process_and_upload_tables", return_value=mock_db) - mock_list_existing_s3_files = mocker.patch("src.extract_lambda.list_existing_s3_files", return_value={}) + mock_process_and_upload_tables = mocker.patch( + "src.extract_lambda.process_and_upload_tables", return_value=mock_db + ) + mock_list_existing_s3_files = mocker.patch( + "src.extract_lambda.list_existing_s3_files", return_value={} + ) event = {} context = {} response = lambda_handler(event, context) - assert response['statusCode'] == 200 - assert json.loads(response['body']) == 'CSV files processed and uploaded successfully.' + assert response["statusCode"] == 200 + assert ( + json.loads(response["body"]) + == "CSV files processed and uploaded successfully." + ) mock_list_existing_s3_files.assert_called_once() mock_process_and_upload_tables.assert_called_once_with(mock_db, {}) mock_db.close.assert_called_once() @@ -58,71 +78,89 @@ class TestLambdaHandler: def test_lambda_handler_no_changes_detected_no_files_uploaded(self, mocker): mock_db = MagicMock() mock_db.run.side_effect = [ - [['Fruits']], - [['Vegetable', 'Sour', 'Green'], ['Berry', 'Sweet', 'Red']], - [['Food_type'], ['Flavour'], ['Colour']] + [["Fruits"]], + [["Vegetable", "Sour", "Green"], ["Berry", "Sweet", "Red"]], + [["Food_type"], ["Flavour"], ["Colour"]], + ] + mock_db.columns.return_value = [ + {"name": "Food_type"}, + {"name": "Flavour"}, + {"name": "Colour"}, ] - mock_db.columns.return_value = [{'name': 'Food_type'}, {'name': 'Flavour'}, {'name': 'Colour'}] with patch("src.extract_lambda.connect_to_database", return_value=mock_db): - mock_process_and_upload_tables = mocker.patch("src.extract_lambda.process_and_upload_tables", return_value=False) - mock_list_existing_s3_files = mocker.patch("src.extract_lambda.list_existing_s3_files", return_value={}) + mock_process_and_upload_tables = mocker.patch( + "src.extract_lambda.process_and_upload_tables", return_value=False + ) + mock_list_existing_s3_files = mocker.patch( + "src.extract_lambda.list_existing_s3_files", return_value={} + ) event = {} context = {} response = lambda_handler(event, context) - assert response['statusCode'] == 200 - assert json.loads(response['body']) == 'No changes detected, no CSV files were uploaded.' + assert response["statusCode"] == 200 + assert ( + json.loads(response["body"]) + == "No changes detected, no CSV files were uploaded." + ) mock_list_existing_s3_files.assert_called_once() mock_process_and_upload_tables.assert_called_once_with(mock_db, {}) mock_db.close.assert_called_once() def test_lambda_handler_exception_error(self, mocker): - with patch("src.extract_lambda.connect_to_database", side_effect=Exception("Database connection error")): - mock_process_and_upload_tables = mocker.patch("src.extract_lambda.process_and_upload_tables") - mock_list_existing_s3_files = mocker.patch("src.extract_lambda.list_existing_s3_files") + with patch( + "src.extract_lambda.connect_to_database", + side_effect=Exception("Database connection error"), + ): + mock_process_and_upload_tables = mocker.patch( + "src.extract_lambda.process_and_upload_tables" + ) + mock_list_existing_s3_files = mocker.patch( + "src.extract_lambda.list_existing_s3_files" + ) event = {} context = {} response = lambda_handler(event, context) - assert response['statusCode'] == 500 - assert json.loads(response['body']) == 'Internal server error.' + assert response["statusCode"] == 500 + assert json.loads(response["body"]) == "Internal server error." mock_list_existing_s3_files.assert_not_called() - mock_process_and_upload_tables.assert_not_called() + mock_process_and_upload_tables.assert_not_called() + class TestListExistingS3Files: def test_error_if_no_bucket(self, s3_client, caplog): - logger = logging.getLogger() - logger.info('Testing now.') + logger.info("Testing now.") caplog.set_level(logging.ERROR) list_existing_s3_files(client=s3_client) - assert 'Error listing S3 objects' in caplog.text + assert "Error listing S3 objects" in caplog.text def test_error_if_bucket_is_empty(self, s3_client, caplog): - - s3_client.create_bucket(Bucket='extract_bucket', - CreateBucketConfiguration={ - 'LocationConstraint': 'eu-west-2' - }) + s3_client.create_bucket( + Bucket="extract_bucket", + CreateBucketConfiguration={"LocationConstraint": "eu-west-2"}, + ) list_existing_s3_files(client=s3_client) - assert 'The bucket is empty' in caplog.text + assert "The bucket is empty" in caplog.text def test_error_retrieving_object(self, s3_client, caplog): - s3_client.upload_file('tests/dummy.txt', 'extract_bucket', 'dummy.txt') - list_existing_s3_files(bucket_name='test_bucket', client=s3_client) + s3_client.upload_file("tests/dummy.txt", "extract_bucket", "dummy.txt") + list_existing_s3_files(bucket_name="test_bucket", client=s3_client) - assert 'Error retrieving S3 object ' in caplog.text + assert "Error retrieving S3 object " in caplog.text def test_retrieves_file_content(self, s3_client, caplog): result = list_existing_s3_files(client=s3_client) - assert list(result.values()) == ['This is a test file.'] + assert list(result.values()) == ["This is a test file."] + class TestConnectToDatabase: def test_connect_to_database(mock_conn, mock_config): - with patch("src.extract_lambda.Connection", autospec=True) as mock_conn: + with patch("src.extract_lambda.Connection", autospec=True) as mock_conn: connect_to_database() mock_conn.assert_called_with( - host="abc", user="def", port="5432", password="password", database="db" + host="abc", user="def", port="5432", password="password", database="db" ) def test_database_error(self, mock_config): @@ -131,12 +169,14 @@ class TestConnectToDatabase: def test_logs_interface_error(self, caplog): logger = logging.getLogger() - logger.info('Testing now.') + logger.info("Testing now.") caplog.set_level(logging.ERROR) with pytest.raises(DBConnectionException): connect_to_database() - assert 'Interface error' in caplog.text -''' + assert "Interface error" in caplog.text + + +""" class TestProcessAndUploadTables: def test_error_process_and_upload_tables(mock_conn, mock_config, s3_client, caplog): logger = logging.getLogger() @@ -161,4 +201,4 @@ class TestProcessAndUploadTables: s3_client.upload_file('tests/dummy_identical.csv', 'extract_bucket', s3_key) process_and_upload_tables(mock_db(), existing_files, client=s3_client) assert 'No new data.' in caplog.text -''' \ No newline at end of file +""" -- cgit v1.2.3 From e27c6b48897a48f8462b8a0f40deb0ddaf301b63 Mon Sep 17 00:00:00 2001 From: Ang Bel Date: Mon, 19 Aug 2024 11:21:58 +0100 Subject: layers block update, function resources to inlcude attributes: layers, correct handler and source_code_hash --- terraform/lambda.tf | 70 +++++++++++++++++++++++++++++------------------------ terraform/s3.tf | 5 ++++ 2 files changed, 44 insertions(+), 31 deletions(-) diff --git a/terraform/lambda.tf b/terraform/lambda.tf index 27e6266..e33bc79 100644 --- a/terraform/lambda.tf +++ b/terraform/lambda.tf @@ -12,12 +12,14 @@ resource "aws_s3_object" "extract_lambda_code" { } resource "aws_lambda_function" "extract_lambda" { - function_name = var.extract_lambda_name - s3_bucket = aws_s3_bucket.lambda_code_bucket.bucket - s3_key = aws_s3_object.extract_lambda_code.key - role = aws_iam_role.multi_service_role.arn - handler = "extract_lambda.extract" - runtime = "python3.11" + function_name = var.extract_lambda_name + s3_bucket = aws_s3_bucket.lambda_code_bucket.bucket + s3_key = aws_s3_object.extract_lambda_code.key + layers = [aws_lambda_layer_version.lambda_layer.arn] + role = aws_iam_role.multi_service_role.arn + handler = "extract_lambda.lambda_handler" + runtime = "python3.11" + source_code_hash = data.archive_file.extract_lambda_zip.output_base64sha256 lifecycle { create_before_destroy = true @@ -40,12 +42,14 @@ resource "aws_s3_object" "transform_lambda_code" { } resource "aws_lambda_function" "transform_lambda" { - function_name = var.transform_lambda_name - s3_bucket = aws_s3_bucket.lambda_code_bucket.bucket - s3_key = aws_s3_object.transform_lambda_code.key - role = aws_iam_role.multi_service_role.arn - handler = "transform_lambda.transform" - runtime = "python3.11" + function_name = var.transform_lambda_name + s3_bucket = aws_s3_bucket.lambda_code_bucket.bucket + s3_key = aws_s3_object.transform_lambda_code.key + layers = [aws_lambda_layer_version.lambda_layer.arn] + role = aws_iam_role.multi_service_role.arn + handler = "transform_lambda.lambda_handler" + runtime = "python3.11" + source_code_hash = data.archive_file.transform_lambda_zip.output_base64sha256 lifecycle { create_before_destroy = true @@ -68,12 +72,14 @@ resource "aws_s3_object" "load_lambda_code" { } resource "aws_lambda_function" "load_lambda" { - function_name = var.load_lambda_name - s3_bucket = aws_s3_bucket.lambda_code_bucket.bucket - s3_key = aws_s3_object.load_lambda_code.key - role = aws_iam_role.multi_service_role.arn - handler = "load_lambda.load" - runtime = "python3.11" + function_name = var.load_lambda_name + s3_bucket = aws_s3_bucket.lambda_code_bucket.bucket + s3_key = aws_s3_object.load_lambda_code.key + layers = [aws_lambda_layer_version.lambda_layer.arn] + role = aws_iam_role.multi_service_role.arn + handler = "load_lambda.lambda_handler" + runtime = "python3.11" + source_code_hash = data.archive_file.load_lambda_zip.output_base64sha256 lifecycle { create_before_destroy = true @@ -82,10 +88,12 @@ resource "aws_lambda_function" "load_lambda" { depends_on = [aws_s3_object.load_lambda_code] } +# Lambda Layer Specification locals { - layer_dir = "${path.module}/.." - requirements = "${path.module}/../requirements.txt" - layer_zip = "${path.module}/../layer.zip" + layer_dir = "lambda_layer" + requirements = "requirements.txt" + layer_zip = "layer.zip" + layer_name = "lambda_layer_dev" } resource "null_resource" "prepare_layer" { @@ -95,23 +103,23 @@ resource "null_resource" "prepare_layer" { rm -rf python mkdir python pip3 install -r ${local.requirements} -t python/ - zip -r ${local.layer_zip} python/ - EOT - } + zip -r ${local.layer_zip} python + EOT + } #removed / at the end of python in line 99 } -resource "aws_s3_object" "layer_zip" { - bucket = aws_s3_bucket.lambda_code_bucket.bucket - key = "layer.zip" +resource "aws_s3_object" "lambda_layer_zip" { + bucket = aws_s3_bucket.lambda_code_bucket.id #bucket instead of id + key = "lambda_layer/${local.layer_name}/${local.layer_zip}" source = "${local.layer_dir}/${local.layer_zip}" depends_on = [null_resource.prepare_layer] } resource "aws_lambda_layer_version" "lambda_layer" { - layer_name = "lambda_layer" + layer_name = local.layer_name compatible_runtimes = ["python3.11"] - s3_bucket = aws_s3_bucket.lambda_code_bucket.bucket - s3_key = aws_s3_object.layer_zip.key + s3_bucket = aws_s3_bucket.lambda_layer_bucket.id #bucket instead of id + s3_key = aws_s3_object.lambda_layer_zip.key skip_destroy = true - depends_on = [aws_s3_object.layer_zip] + depends_on = [aws_s3_object.lambda_layer_zip] } diff --git a/terraform/s3.tf b/terraform/s3.tf index d5cdee3..b3a863c 100644 --- a/terraform/s3.tf +++ b/terraform/s3.tf @@ -12,3 +12,8 @@ resource "aws_s3_bucket" "transform_bucket" { resource "aws_s3_bucket" "lambda_code_bucket" { bucket_prefix = "${var.s3_code_bucket_name}-" } + +### LAMBDA LAYER BUCKET +resource "aws_s3_bucket" "lambda_layer_bucket" { + bucket_prefix = "lambda-layer-dev-" +} \ No newline at end of file -- cgit v1.2.3 From 43df5dd9c6bd21f33a7fccbc9b81ad3677637da5 Mon Sep 17 00:00:00 2001 From: "deepsource-autofix[bot]" <62050782+deepsource-autofix[bot]@users.noreply.github.com> Date: Mon, 19 Aug 2024 10:23:19 +0000 Subject: style: format code with Autopep8, Black and Ruff Formatter This commit fixes the style issues introduced in e27c6b4 according to the output from Autopep8, Black and Ruff Formatter. Details: https://github.com/ajschofield/de-project-bentley/pull/55 --- src/load_lambda.py | 2 +- src/secrets_manager.py | 31 +++++++++---------- src/transform_lambda.py | 2 +- test/test_secrets_manager.py | 19 +++++++----- tests/test_extract_lambda.py | 69 ++++++++++++++++++++++++------------------- tests/test_secrets_manager.py | 37 +++++++++++++++-------- 6 files changed, 93 insertions(+), 67 deletions(-) diff --git a/src/load_lambda.py b/src/load_lambda.py index 6ee681f..c6a8e60 100644 --- a/src/load_lambda.py +++ b/src/load_lambda.py @@ -1,2 +1,2 @@ def lambda_handler(): - pass \ No newline at end of file + pass diff --git a/src/secrets_manager.py b/src/secrets_manager.py index c0fb61e..3484688 100644 --- a/src/secrets_manager.py +++ b/src/secrets_manager.py @@ -4,45 +4,46 @@ import json def sm_client(): - sm_client = boto3.client('secretsmanager') + sm_client = boto3.client("secretsmanager") yield sm_client -def create_secret(sm_client, secret_name, cohort_id, user, password, host, database, port): + +def create_secret( + sm_client, secret_name, cohort_id, user, password, host, database, port +): secret = { "cohort_id": cohort_id, "user": user, "password": password, "host": host, "database": database, - "port": port + "port": port, } response = sm_client.create_secret( - Name = secret_name, - SecretString = json.dumps(secret) + Name=secret_name, SecretString=json.dumps(secret) ) print(response) return response + def list_secret(sm_client): response = sm_client.list_secrets() - secret_dict = response['SecretList'] + secret_dict = response["SecretList"] secret_names = [] for items in secret_dict: - secret_names.append(items['Name']) - print(f'{len(secret_names)} secret(s) available') + secret_names.append(items["Name"]) + print(f"{len(secret_names)} secret(s) available") for name in secret_names: print(name) return secret_names -def retrieve_secrets(sm_client): - response = sm_client.get_secrets( - - ) +def retrieve_secrets(sm_client): + response = sm_client.get_secrets() -#retrieve secret -#so lambda can access totesy db -#so lambda connect to the db and then retrieve the data \ No newline at end of file +# retrieve secret +# so lambda can access totesy db +# so lambda connect to the db and then retrieve the data diff --git a/src/transform_lambda.py b/src/transform_lambda.py index 6ee681f..c6a8e60 100644 --- a/src/transform_lambda.py +++ b/src/transform_lambda.py @@ -1,2 +1,2 @@ def lambda_handler(): - pass \ No newline at end of file + pass diff --git a/test/test_secrets_manager.py b/test/test_secrets_manager.py index 86533bc..cb4ec15 100644 --- a/test/test_secrets_manager.py +++ b/test/test_secrets_manager.py @@ -2,10 +2,12 @@ from src.secrets_manager import sm_client, create_secret, list_secret import boto3 from moto import mock_aws import json -import pytest +import pytest import os -pytest.fixture(scope='class') +pytest.fixture(scope="class") + + def mock_aws_credentials(): """Mocked AWS Credentials for moto.""" os.environ["AWS_ACCESS_KEY_ID"] = "testing" @@ -14,10 +16,11 @@ def mock_aws_credentials(): os.environ["AWS_SESSION_TOKEN"] = "testing" os.environ["AWS_DEFAULT_REGION"] = "eu-west-2" -@pytest.fixture(scope='class') + +@pytest.fixture(scope="class") def mock_sm_client(mock_aws_credentials): with mock_aws(): - yield boto3.client('secretsmanager') + yield boto3.client("secretsmanager") def test_create_secret_stores_secrets(mock_sm_client): @@ -29,6 +32,8 @@ def test_create_secret_stores_secrets(mock_sm_client): port = "test_port" secret_name = "test_secret" - response = create_secret(mock_sm_client, secret_name, cohort_id, user, password, host, database, port) - - assert response['Name'] == secret_name \ No newline at end of file + response = create_secret( + mock_sm_client, secret_name, cohort_id, user, password, host, database, port + ) + + assert response["Name"] == secret_name diff --git a/tests/test_extract_lambda.py b/tests/test_extract_lambda.py index e94a8a4..877e36a 100644 --- a/tests/test_extract_lambda.py +++ b/tests/test_extract_lambda.py @@ -3,11 +3,17 @@ import boto3 from moto import mock_aws from unittest.mock import patch, MagicMock from unittest import TestCase -from src.extract_lambda import list_existing_s3_files, connect_to_database, DBConnectionException, process_and_upload_tables -import os +from src.extract_lambda import ( + list_existing_s3_files, + connect_to_database, + DBConnectionException, + process_and_upload_tables, +) +import os import logging -@pytest.fixture(scope='class') + +@pytest.fixture(scope="class") def mock_config(): env_vars = { "host": "abc", @@ -20,54 +26,55 @@ def mock_config(): yield mock_config -@pytest.fixture(scope='class') +@pytest.fixture(scope="class") def aws_credentials(): - os.environ["AWS_ACCESS_KEY_ID"] = 'testing' - os.environ["AWS_SECRET_ACCESS_KEY"] = 'testing' - os.environ["AWS_SECURIT_TOKEN"] = 'testing' - os.environ["AWS_SESSION_TOKEN"] = 'testing' - os.environ["AWS_DEFAULT_REGION"]= 'eu-west-2' + os.environ["AWS_ACCESS_KEY_ID"] = "testing" + os.environ["AWS_SECRET_ACCESS_KEY"] = "testing" + os.environ["AWS_SECURIT_TOKEN"] = "testing" + os.environ["AWS_SESSION_TOKEN"] = "testing" + os.environ["AWS_DEFAULT_REGION"] = "eu-west-2" + -@pytest.fixture(scope='class') +@pytest.fixture(scope="class") def s3_client(aws_credentials): with mock_aws(): - yield boto3.client('s3') + yield boto3.client("s3") + class TestListExistingS3Files: def test_error_if_no_bucket(self, s3_client, caplog): - logger = logging.getLogger() - logger.info('Testing now.') + logger.info("Testing now.") caplog.set_level(logging.ERROR) list_existing_s3_files(client=s3_client) - assert 'Error listing S3 objects' in caplog.text + assert "Error listing S3 objects" in caplog.text def test_error_if_bucket_is_empty(self, s3_client, caplog): - - s3_client.create_bucket(Bucket='extract_bucket', - CreateBucketConfiguration={ - 'LocationConstraint': 'eu-west-2' - }) + s3_client.create_bucket( + Bucket="extract_bucket", + CreateBucketConfiguration={"LocationConstraint": "eu-west-2"}, + ) list_existing_s3_files(client=s3_client) - assert 'The bucket is empty' in caplog.text + assert "The bucket is empty" in caplog.text def test_error_retrieving_object(self, s3_client, caplog): - s3_client.upload_file('tests/dummy.txt', 'extract_bucket', 'dummy.txt') - list_existing_s3_files(bucket_name='test_bucket', client=s3_client) + s3_client.upload_file("tests/dummy.txt", "extract_bucket", "dummy.txt") + list_existing_s3_files(bucket_name="test_bucket", client=s3_client) - assert 'Error retrieving S3 object ' in caplog.text + assert "Error retrieving S3 object " in caplog.text def test_retrieves_file_content(self, s3_client, caplog): result = list_existing_s3_files(client=s3_client) - assert list(result.values()) == ['This is a test file.'] + assert list(result.values()) == ["This is a test file."] + class TestConnectToDatabase: def test_connect_to_database(mock_conn, mock_config): - with patch("src.extract_lambda.Connection", autospec=True) as mock_conn: + with patch("src.extract_lambda.Connection", autospec=True) as mock_conn: connect_to_database() mock_conn.assert_called_with( - host="abc", user="def", port="5432", password="password", database="db" + host="abc", user="def", port="5432", password="password", database="db" ) def test_database_error(self, mock_config): @@ -76,12 +83,14 @@ class TestConnectToDatabase: def test_logs_interface_error(self, caplog): logger = logging.getLogger() - logger.info('Testing now.') + logger.info("Testing now.") caplog.set_level(logging.ERROR) with pytest.raises(DBConnectionException): connect_to_database() - assert 'Interface error' in caplog.text -''' + assert "Interface error" in caplog.text + + +""" class TestProcessAndUploadTables: def test_error_process_and_upload_tables(mock_conn, mock_config, s3_client, caplog): logger = logging.getLogger() @@ -106,4 +115,4 @@ class TestProcessAndUploadTables: s3_client.upload_file('tests/dummy_identical.csv', 'extract_bucket', s3_key) process_and_upload_tables(mock_db(), existing_files, client=s3_client) assert 'No new data.' in caplog.text -''' \ No newline at end of file +""" diff --git a/tests/test_secrets_manager.py b/tests/test_secrets_manager.py index a30be86..609c572 100644 --- a/tests/test_secrets_manager.py +++ b/tests/test_secrets_manager.py @@ -3,10 +3,11 @@ import boto3 import botocore.exceptions from moto import mock_aws import json -import pytest +import pytest import os -@pytest.fixture(scope='function') + +@pytest.fixture(scope="function") def aws_credentials(): """Mocked AWS Credentials for moto.""" os.environ["AWS_ACCESS_KEY_ID"] = "testing" @@ -15,12 +16,14 @@ def aws_credentials(): os.environ["AWS_SESSION_TOKEN"] = "testing" os.environ["AWS_DEFAULT_REGION"] = "eu-west-2" -@pytest.fixture(scope='function') + +@pytest.fixture(scope="function") def mock_sm_client(aws_credentials): with mock_aws(): yield boto3.client("secretsmanager") -@pytest.fixture(scope='function') + +@pytest.fixture(scope="function") def mock_store_secret(mock_sm_client): secret = { "cohort_id": "test_cohort_id", @@ -28,15 +31,18 @@ def mock_store_secret(mock_sm_client): "password": "test_password", "host": "test_host", "database": "test_database", - "port": "test_port" + "port": "test_port", } secret_name = "test_secret" - response = mock_sm_client.create_secret(Name=secret_name, SecretString=json.dumps(secret)) + response = mock_sm_client.create_secret( + Name=secret_name, SecretString=json.dumps(secret) + ) return response + def test_retrieves_secrets_returns_dictionary(mock_sm_client, mock_store_secret): secret_name = "test_secret" @@ -44,8 +50,10 @@ def test_retrieves_secrets_returns_dictionary(mock_sm_client, mock_store_secret) assert isinstance(result, dict) -def test_retrieves_secrets_returns_correct_keys_and_values(mock_sm_client, mock_store_secret): +def test_retrieves_secrets_returns_correct_keys_and_values( + mock_sm_client, mock_store_secret +): secret_name = "test_secret" result = retrieve_secrets(mock_sm_client, secret_name) @@ -57,17 +65,20 @@ def test_retrieves_secrets_returns_correct_keys_and_values(mock_sm_client, mock_ assert result["database"] == "test_database" assert result["port"] == "test_port" -def test_retrieves_secrets_raises_error_if_secret_name_incorrect_data_type(mock_sm_client): - secret_name = [1, 2, 3] +def test_retrieves_secrets_raises_error_if_secret_name_incorrect_data_type( + mock_sm_client, +): + secret_name = [1, 2, 3] with pytest.raises(botocore.exceptions.ParamValidationError) as error: retrieve_secrets(mock_sm_client, secret_name) -def test_retrieves_secrets_raises_error_if_secret_name_does_not_exist(mock_sm_client, mock_store_secret): - secret_name = 'test_secret_2' - +def test_retrieves_secrets_raises_error_if_secret_name_does_not_exist( + mock_sm_client, mock_store_secret +): + secret_name = "test_secret_2" with pytest.raises(botocore.exceptions.ClientError) as error: - retrieve_secrets(mock_sm_client, secret_name) \ No newline at end of file + retrieve_secrets(mock_sm_client, secret_name) -- cgit v1.2.3 From 1ea59ed0d92d5bbbd1ffe46ca7a1e296aa55fb1f Mon Sep 17 00:00:00 2001 From: T-Aji Date: Mon, 19 Aug 2024 11:29:45 +0100 Subject: all tests added --- tests/test_extract_lambda.py | 155 ++++++++++++++++++++++++++++++++++--------- 1 file changed, 125 insertions(+), 30 deletions(-) diff --git a/tests/test_extract_lambda.py b/tests/test_extract_lambda.py index e94a8a4..67cb6d3 100644 --- a/tests/test_extract_lambda.py +++ b/tests/test_extract_lambda.py @@ -3,11 +3,19 @@ import boto3 from moto import mock_aws from unittest.mock import patch, MagicMock from unittest import TestCase -from src.extract_lambda import list_existing_s3_files, connect_to_database, DBConnectionException, process_and_upload_tables -import os +from src.extract_lambda import ( + list_existing_s3_files, + connect_to_database, + DBConnectionException, + lambda_handler, + process_and_upload_tables, +) +import os import logging +import json -@pytest.fixture(scope='class') + +@pytest.fixture(scope="class") def mock_config(): env_vars = { "host": "abc", @@ -20,54 +28,139 @@ def mock_config(): yield mock_config -@pytest.fixture(scope='class') +@pytest.fixture(scope="class") def aws_credentials(): - os.environ["AWS_ACCESS_KEY_ID"] = 'testing' - os.environ["AWS_SECRET_ACCESS_KEY"] = 'testing' - os.environ["AWS_SECURIT_TOKEN"] = 'testing' - os.environ["AWS_SESSION_TOKEN"] = 'testing' - os.environ["AWS_DEFAULT_REGION"]= 'eu-west-2' + os.environ["AWS_ACCESS_KEY_ID"] = "testing" + os.environ["AWS_SECRET_ACCESS_KEY"] = "testing" + os.environ["AWS_SECURIT_TOKEN"] = "testing" + os.environ["AWS_SESSION_TOKEN"] = "testing" + os.environ["AWS_DEFAULT_REGION"] = "eu-west-2" + -@pytest.fixture(scope='class') +@pytest.fixture(scope="class") def s3_client(aws_credentials): with mock_aws(): - yield boto3.client('s3') + yield boto3.client("s3") + + +class TestLambdaHandler: + def test_lambda_handler_files_processed_and_uploaded_successfully(self, mocker): + mock_db = MagicMock() + mock_db.run.side_effect = [ + [["Fruits"]], + [["Vegetable", "Sour", "Green"], ["Berry", "Sweet", "Red"]], + [["Food_type"], ["Flavour"], ["Colour"]], + ] + mock_db.columns.return_value = [ + {"name": "Food_type"}, + {"name": "Flavour"}, + {"name": "Colour"}, + ] + with patch("src.extract_lambda.connect_to_database", return_value=mock_db): + mock_process_and_upload_tables = mocker.patch( + "src.extract_lambda.process_and_upload_tables", return_value=mock_db + ) + mock_list_existing_s3_files = mocker.patch( + "src.extract_lambda.list_existing_s3_files", return_value={} + ) + event = {} + context = {} + response = lambda_handler(event, context) + assert response["statusCode"] == 200 + assert ( + json.loads(response["body"]) + == "CSV files processed and uploaded successfully." + ) + mock_list_existing_s3_files.assert_called_once() + mock_process_and_upload_tables.assert_called_once_with(mock_db, {}) + mock_db.close.assert_called_once() + + def test_lambda_handler_no_changes_detected_no_files_uploaded(self, mocker): + mock_db = MagicMock() + mock_db.run.side_effect = [ + [["Fruits"]], + [["Vegetable", "Sour", "Green"], ["Berry", "Sweet", "Red"]], + [["Food_type"], ["Flavour"], ["Colour"]], + ] + mock_db.columns.return_value = [ + {"name": "Food_type"}, + {"name": "Flavour"}, + {"name": "Colour"}, + ] + + with patch("src.extract_lambda.connect_to_database", return_value=mock_db): + mock_process_and_upload_tables = mocker.patch( + "src.extract_lambda.process_and_upload_tables", return_value=False + ) + mock_list_existing_s3_files = mocker.patch( + "src.extract_lambda.list_existing_s3_files", return_value={} + ) + event = {} + context = {} + response = lambda_handler(event, context) + assert response["statusCode"] == 200 + assert ( + json.loads(response["body"]) + == "No changes detected, no CSV files were uploaded." + ) + mock_list_existing_s3_files.assert_called_once() + mock_process_and_upload_tables.assert_called_once_with(mock_db, {}) + mock_db.close.assert_called_once() + + def test_lambda_handler_exception_error(self, mocker): + with patch( + "src.extract_lambda.connect_to_database", + side_effect=Exception("Database connection error"), + ): + mock_process_and_upload_tables = mocker.patch( + "src.extract_lambda.process_and_upload_tables" + ) + mock_list_existing_s3_files = mocker.patch( + "src.extract_lambda.list_existing_s3_files" + ) + event = {} + context = {} + response = lambda_handler(event, context) + assert response["statusCode"] == 500 + assert json.loads(response["body"]) == "Internal server error." + mock_list_existing_s3_files.assert_not_called() + mock_process_and_upload_tables.assert_not_called() + class TestListExistingS3Files: def test_error_if_no_bucket(self, s3_client, caplog): - logger = logging.getLogger() - logger.info('Testing now.') + logger.info("Testing now.") caplog.set_level(logging.ERROR) list_existing_s3_files(client=s3_client) - assert 'Error listing S3 objects' in caplog.text + assert "Error listing S3 objects" in caplog.text def test_error_if_bucket_is_empty(self, s3_client, caplog): - - s3_client.create_bucket(Bucket='extract_bucket', - CreateBucketConfiguration={ - 'LocationConstraint': 'eu-west-2' - }) + s3_client.create_bucket( + Bucket="extract_bucket", + CreateBucketConfiguration={"LocationConstraint": "eu-west-2"}, + ) list_existing_s3_files(client=s3_client) - assert 'The bucket is empty' in caplog.text + assert "The bucket is empty" in caplog.text def test_error_retrieving_object(self, s3_client, caplog): - s3_client.upload_file('tests/dummy.txt', 'extract_bucket', 'dummy.txt') - list_existing_s3_files(bucket_name='test_bucket', client=s3_client) + s3_client.upload_file("tests/dummy.txt", "extract_bucket", "dummy.txt") + list_existing_s3_files(bucket_name="test_bucket", client=s3_client) - assert 'Error retrieving S3 object ' in caplog.text + assert "Error retrieving S3 object " in caplog.text def test_retrieves_file_content(self, s3_client, caplog): result = list_existing_s3_files(client=s3_client) - assert list(result.values()) == ['This is a test file.'] + assert list(result.values()) == ["This is a test file."] + class TestConnectToDatabase: def test_connect_to_database(mock_conn, mock_config): - with patch("src.extract_lambda.Connection", autospec=True) as mock_conn: + with patch("src.extract_lambda.Connection", autospec=True) as mock_conn: connect_to_database() mock_conn.assert_called_with( - host="abc", user="def", port="5432", password="password", database="db" + host="abc", user="def", port="5432", password="password", database="db" ) def test_database_error(self, mock_config): @@ -76,12 +169,14 @@ class TestConnectToDatabase: def test_logs_interface_error(self, caplog): logger = logging.getLogger() - logger.info('Testing now.') + logger.info("Testing now.") caplog.set_level(logging.ERROR) with pytest.raises(DBConnectionException): connect_to_database() - assert 'Interface error' in caplog.text -''' + assert "Interface error" in caplog.text + + +""" class TestProcessAndUploadTables: def test_error_process_and_upload_tables(mock_conn, mock_config, s3_client, caplog): logger = logging.getLogger() @@ -106,4 +201,4 @@ class TestProcessAndUploadTables: s3_client.upload_file('tests/dummy_identical.csv', 'extract_bucket', s3_key) process_and_upload_tables(mock_db(), existing_files, client=s3_client) assert 'No new data.' in caplog.text -''' \ No newline at end of file +""" -- cgit v1.2.3 From 24a4573d6cf64ec0383ae16bfba09a0ffdb8c129 Mon Sep 17 00:00:00 2001 From: T-Aji Date: Mon, 19 Aug 2024 11:49:08 +0100 Subject: update .gitignore --- .gitignore | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/.gitignore b/.gitignore index bceab93..6aa03fc 100644 --- a/.gitignore +++ b/.gitignore @@ -14,5 +14,4 @@ __pycache__/ # OS-Related Files .DS_Store - -*venv* +venv \ No newline at end of file -- cgit v1.2.3 From 444bb270fc8f758f33b0477c992b6a8e873bcd89 Mon Sep 17 00:00:00 2001 From: "deepsource-autofix[bot]" <62050782+deepsource-autofix[bot]@users.noreply.github.com> Date: Mon, 19 Aug 2024 11:06:02 +0000 Subject: style: format code with Autopep8, Black and Ruff Formatter This commit fixes the style issues introduced in 0eff70f according to the output from Autopep8, Black and Ruff Formatter. Details: https://github.com/ajschofield/de-project-bentley/pull/59 --- tests/test_extract_lambda.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/test_extract_lambda.py b/tests/test_extract_lambda.py index fc68a4a..7707cbf 100644 --- a/tests/test_extract_lambda.py +++ b/tests/test_extract_lambda.py @@ -42,6 +42,7 @@ def s3_client(aws_credentials): with mock_aws(): yield boto3.client("s3") + class TestLambdaHandler: def test_lambda_handler_files_processed_and_uploaded_successfully(self, mocker): mock_db = MagicMock() @@ -125,6 +126,7 @@ class TestLambdaHandler: mock_list_existing_s3_files.assert_not_called() mock_process_and_upload_tables.assert_not_called() + class TestListExistingS3Files: def test_error_if_no_bucket(self, s3_client, caplog): logger = logging.getLogger() -- cgit v1.2.3 From 81cba7c5bc4bed060901d6e19c84d5acee054b3e Mon Sep 17 00:00:00 2001 From: Alex Schofield Date: Mon, 19 Aug 2024 14:53:43 +0100 Subject: feat: create shell script for creating lambda layer zip --- scripts/make_layer_zip.sh | 7 +++++++ 1 file changed, 7 insertions(+) create mode 100755 scripts/make_layer_zip.sh diff --git a/scripts/make_layer_zip.sh b/scripts/make_layer_zip.sh new file mode 100755 index 0000000..0e7560f --- /dev/null +++ b/scripts/make_layer_zip.sh @@ -0,0 +1,7 @@ +# Description: Make the zip file for the layer + +cd "$(dirname "$0")/.." +mkdir tmp_python +pip3 install --upgrade -r requirements.txt -t tmp_python/ +zip -r layer.zip tmp_python +rm -r tmp_python/ -- cgit v1.2.3 From 57d1e1ee5a13269f1bef6c3b754cb8374a657202 Mon Sep 17 00:00:00 2001 From: Alex Schofield Date: Mon, 19 Aug 2024 14:55:39 +0100 Subject: style: remove redundant comment --- terraform/lambda.tf | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/terraform/lambda.tf b/terraform/lambda.tf index e33bc79..714ffa5 100644 --- a/terraform/lambda.tf +++ b/terraform/lambda.tf @@ -99,13 +99,9 @@ locals { resource "null_resource" "prepare_layer" { provisioner "local-exec" { command = < Date: Mon, 19 Aug 2024 15:02:39 +0100 Subject: infra(tf): modify variables & remove past zip creation --- terraform/lambda.tf | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/terraform/lambda.tf b/terraform/lambda.tf index 714ffa5..986170f 100644 --- a/terraform/lambda.tf +++ b/terraform/lambda.tf @@ -90,17 +90,16 @@ resource "aws_lambda_function" "load_lambda" { # Lambda Layer Specification locals { - layer_dir = "lambda_layer" + layer_dir = "../" requirements = "requirements.txt" layer_zip = "layer.zip" layer_name = "lambda_layer_dev" + script_dir = "../scripts" } resource "null_resource" "prepare_layer" { provisioner "local-exec" { - command = < Date: Mon, 19 Aug 2024 15:03:50 +0100 Subject: wip: amend extract_lambda test --- tests/test_extract_lambda.py | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/tests/test_extract_lambda.py b/tests/test_extract_lambda.py index 7707cbf..4a5157b 100644 --- a/tests/test_extract_lambda.py +++ b/tests/test_extract_lambda.py @@ -12,6 +12,7 @@ from src.extract_lambda import ( DBConnectionException, lambda_handler, process_and_upload_tables, + retrieve_secrets ) @@ -24,7 +25,7 @@ def mock_config(): "password": "password", "database": "db", } - with patch("src.extract_lambda.get_config", return_value=env_vars) as mock_config: + with patch("src.extract_lambda.retrieve_secrets", return_value=env_vars) as mock_config: yield mock_config @@ -140,7 +141,7 @@ class TestListExistingS3Files: Bucket="extract_bucket", CreateBucketConfiguration={"LocationConstraint": "eu-west-2"}, ) - list_existing_s3_files(client=s3_client) + list_existing_s3_files("extract_bucket", client=s3_client) assert "The bucket is empty" in caplog.text def test_error_retrieving_object(self, s3_client, caplog): @@ -176,9 +177,8 @@ class TestConnectToDatabase: assert "Interface error" in caplog.text -""" class TestProcessAndUploadTables: - def test_error_process_and_upload_tables(mock_conn, mock_config, s3_client, caplog): + def test_error_process_and_upload_tables(mock_conn, s3_client, caplog): logger = logging.getLogger() logger.info('Testing now.') caplog.set_level(logging.ERROR) @@ -188,17 +188,17 @@ class TestProcessAndUploadTables: "SELECT column_name FROM INFORMATION_SCHEMA.COLUMNS where table_name = 'Fruits'"] return_values = [[['Fruits']], [['Vegetable','Sour','Green'],['Berry','Sweet','Red']], - [['Food_type'],['Flavour'],['Colour']]] + [['Food_type'],['Flavour'],['Colour']]] # why are individual column names in lists vals = dict(zip(queries,return_values)) + # {"SELECT table_name FROM information_schema.tables WHERE table_schema='public' AND table_type='BASE TABLE';": [['Fruits']], 'SELECT * FROM Fruits;': [['Vegetable', 'Sour', 'Green'], ['Berry', 'Sweet', 'Red']], "SELECT column_name FROM INFORMATION_SCHEMA.COLUMNS where table_name = 'Fruits'": [['Food_type'], ['Flavour'], ['Colour']]} - #### - with patch('src.extract_lambda.connect_to_database') as mock_db: - mock_db().run.side_effects = return_values + with patch('src.extract_lambda.Connection') as mock_db: + mock_db().run.side_effect = return_values s3_key = 'Fruits/2024/08/15/Fruits_16:46:30.csv' existing_files = {s3_key: 'Food_type,Flavour,Colour\nFruit,Sour,Green\nBerry,Sweet,Red'} - s3_client.create_bucket(Bucket='extract_bucket', + s3_client.create_bucket(Bucket='test_extract_bucket', CreateBucketConfiguration={'LocationConstraint': 'eu-west-2'}) + print(s3_client.list_buckets) s3_client.upload_file('tests/dummy_identical.csv', 'extract_bucket', s3_key) process_and_upload_tables(mock_db(), existing_files, client=s3_client) - assert 'No new data.' in caplog.text -""" + assert 'No new data.' in caplog.text \ No newline at end of file -- cgit v1.2.3 From 7b46fec037830648f6f356219f9df7fdbbbd181c Mon Sep 17 00:00:00 2001 From: Alex Schofield Date: Mon, 19 Aug 2024 15:06:35 +0100 Subject: infra(tf): remove lambda layer dev reference --- terraform/s3.tf | 5 ----- 1 file changed, 5 deletions(-) diff --git a/terraform/s3.tf b/terraform/s3.tf index b3a863c..d5cdee3 100644 --- a/terraform/s3.tf +++ b/terraform/s3.tf @@ -12,8 +12,3 @@ resource "aws_s3_bucket" "transform_bucket" { resource "aws_s3_bucket" "lambda_code_bucket" { bucket_prefix = "${var.s3_code_bucket_name}-" } - -### LAMBDA LAYER BUCKET -resource "aws_s3_bucket" "lambda_layer_bucket" { - bucket_prefix = "lambda-layer-dev-" -} \ No newline at end of file -- cgit v1.2.3 From 284a52df866c34d925b85ccd4f06d6141e67ce70 Mon Sep 17 00:00:00 2001 From: Alex Schofield Date: Mon, 19 Aug 2024 15:12:56 +0100 Subject: fix(tf): correct layer.zip output path --- terraform/lambda.tf | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/terraform/lambda.tf b/terraform/lambda.tf index 986170f..8a4207d 100644 --- a/terraform/lambda.tf +++ b/terraform/lambda.tf @@ -93,7 +93,7 @@ locals { layer_dir = "../" requirements = "requirements.txt" layer_zip = "layer.zip" - layer_name = "lambda_layer_dev" + layer_name = "lambda_layer" script_dir = "../scripts" } @@ -105,7 +105,7 @@ resource "null_resource" "prepare_layer" { resource "aws_s3_object" "lambda_layer_zip" { bucket = aws_s3_bucket.lambda_code_bucket.id #bucket instead of id - key = "lambda_layer/${local.layer_name}/${local.layer_zip}" + key = "${local.layer_name}/${local.layer_zip}" source = "${local.layer_dir}/${local.layer_zip}" depends_on = [null_resource.prepare_layer] } @@ -113,7 +113,7 @@ resource "aws_s3_object" "lambda_layer_zip" { resource "aws_lambda_layer_version" "lambda_layer" { layer_name = local.layer_name compatible_runtimes = ["python3.11"] - s3_bucket = aws_s3_bucket.lambda_layer_bucket.id #bucket instead of id + s3_bucket = aws_s3_bucket.lambda_bucket.bucket s3_key = aws_s3_object.lambda_layer_zip.key skip_destroy = true depends_on = [aws_s3_object.lambda_layer_zip] -- cgit v1.2.3 From cbf1d083dc0bf4d78da83cb169da49731f8ace65 Mon Sep 17 00:00:00 2001 From: Alex Schofield Date: Mon, 19 Aug 2024 15:18:22 +0100 Subject: fix(tf): correct s3_bucket value for lambda_layer --- terraform/lambda.tf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/terraform/lambda.tf b/terraform/lambda.tf index 8a4207d..bf96747 100644 --- a/terraform/lambda.tf +++ b/terraform/lambda.tf @@ -113,7 +113,7 @@ resource "aws_s3_object" "lambda_layer_zip" { resource "aws_lambda_layer_version" "lambda_layer" { layer_name = local.layer_name compatible_runtimes = ["python3.11"] - s3_bucket = aws_s3_bucket.lambda_bucket.bucket + s3_bucket = aws_s3_bucket.lambda_code_bucket.bucket s3_key = aws_s3_object.lambda_layer_zip.key skip_destroy = true depends_on = [aws_s3_object.lambda_layer_zip] -- cgit v1.2.3 From 024de7d7947f46cf6c0c829dc29eb8298e029576 Mon Sep 17 00:00:00 2001 From: Alex Schofield Date: Mon, 19 Aug 2024 15:37:54 +0100 Subject: fix(make_layer_zip): change folder structure of layer.zip --- scripts/make_layer_zip.sh | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/scripts/make_layer_zip.sh b/scripts/make_layer_zip.sh index 0e7560f..eabe301 100755 --- a/scripts/make_layer_zip.sh +++ b/scripts/make_layer_zip.sh @@ -1,7 +1,8 @@ # Description: Make the zip file for the layer cd "$(dirname "$0")/.." -mkdir tmp_python -pip3 install --upgrade -r requirements.txt -t tmp_python/ -zip -r layer.zip tmp_python -rm -r tmp_python/ +mkdir -p python/lib/python3.11/site-packages +pip3 install --upgrade -r requirements.txt -t python/lib/python3.11/site-packages +rm layer.zip +zip -r layer.zip python +rm -r python/ -- cgit v1.2.3 From 4b3b80a2f2177456ed6c2857a7ae0987d7304360 Mon Sep 17 00:00:00 2001 From: Alex Schofield Date: Mon, 19 Aug 2024 15:40:01 +0100 Subject: chore(tf): remove unused requirements variable --- terraform/lambda.tf | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/terraform/lambda.tf b/terraform/lambda.tf index bf96747..72aae04 100644 --- a/terraform/lambda.tf +++ b/terraform/lambda.tf @@ -90,11 +90,10 @@ resource "aws_lambda_function" "load_lambda" { # Lambda Layer Specification locals { - layer_dir = "../" - requirements = "requirements.txt" - layer_zip = "layer.zip" - layer_name = "lambda_layer" - script_dir = "../scripts" + layer_dir = "../" + layer_zip = "layer.zip" + layer_name = "lambda_layer" + script_dir = "../scripts" } resource "null_resource" "prepare_layer" { -- cgit v1.2.3 From b9f3576771c8af8933d23e95f7863f63e2bbc6aa Mon Sep 17 00:00:00 2001 From: lian-manonog Date: Mon, 19 Aug 2024 15:43:28 +0100 Subject: wip: fixed broken tests; hashed out test_error_retrieving_object --- src/extract_lambda.py | 1 + tests/test_extract_lambda.py | 49 ++++++++++++++++++++++++++------------------ 2 files changed, 30 insertions(+), 20 deletions(-) diff --git a/src/extract_lambda.py b/src/extract_lambda.py index 4168e27..217efdb 100644 --- a/src/extract_lambda.py +++ b/src/extract_lambda.py @@ -90,6 +90,7 @@ def extract_bucket(client=boto3.client("s3")): extract_bucket_filter = [ bucket["Name"] for bucket in response["Buckets"] if "extract" in bucket["Name"] ] + return extract_bucket_filter[0] diff --git a/tests/test_extract_lambda.py b/tests/test_extract_lambda.py index e94a8a4..665e419 100644 --- a/tests/test_extract_lambda.py +++ b/tests/test_extract_lambda.py @@ -1,11 +1,13 @@ +import boto3.exceptions +import botocore.exceptions import pytest import boto3 from moto import mock_aws from unittest.mock import patch, MagicMock from unittest import TestCase -from src.extract_lambda import list_existing_s3_files, connect_to_database, DBConnectionException, process_and_upload_tables -import os +from src.extract_lambda import list_existing_s3_files, connect_to_database, DBConnectionException, process_and_upload_tables, extract_bucket import logging +import os @pytest.fixture(scope='class') def mock_config(): @@ -16,7 +18,7 @@ def mock_config(): "password": "password", "database": "db", } - with patch("src.extract_lambda.get_config", return_value=env_vars) as mock_config: + with patch("src.extract_lambda.retrieve_secrets", return_value=env_vars) as mock_config: yield mock_config @@ -24,7 +26,7 @@ def mock_config(): def aws_credentials(): os.environ["AWS_ACCESS_KEY_ID"] = 'testing' os.environ["AWS_SECRET_ACCESS_KEY"] = 'testing' - os.environ["AWS_SECURIT_TOKEN"] = 'testing' + os.environ["AWS_SECURITY_TOKEN"] = 'testing' os.environ["AWS_SESSION_TOKEN"] = 'testing' os.environ["AWS_DEFAULT_REGION"]= 'eu-west-2' @@ -33,6 +35,14 @@ def s3_client(aws_credentials): with mock_aws(): yield boto3.client('s3') +@pytest.fixture(scope='class') +def s3_mock_bucket(s3_client): + bucket = s3_client.create_bucket(Bucket='extract_bucket', + CreateBucketConfiguration={ + 'LocationConstraint': 'eu-west-2' + }) + return bucket + class TestListExistingS3Files: def test_error_if_no_bucket(self, s3_client, caplog): @@ -42,35 +52,34 @@ class TestListExistingS3Files: list_existing_s3_files(client=s3_client) assert 'Error listing S3 objects' in caplog.text - def test_error_if_bucket_is_empty(self, s3_client, caplog): + def test_error_if_bucket_is_empty(self, s3_client, caplog, s3_mock_bucket): + list_existing_s3_files('extract_bucket', client=s3_client) + assert 'The bucket is empty' in caplog.text - s3_client.create_bucket(Bucket='extract_bucket', - CreateBucketConfiguration={ - 'LocationConstraint': 'eu-west-2' - }) - list_existing_s3_files(client=s3_client) - assert 'The bucket is empty' in caplog.text - def test_error_retrieving_object(self, s3_client, caplog): - s3_client.upload_file('tests/dummy.txt', 'extract_bucket', 'dummy.txt') - list_existing_s3_files(bucket_name='test_bucket', client=s3_client) + # def test_error_retrieving_object(self, s3_client, caplog, s3_mock_bucket): + # s3_client.upload_file('tests/dummy.txt', 'extract_bucket', 'dummy.txt') - assert 'Error retrieving S3 object ' in caplog.text + # list_existing_s3_files(bucket_name='extract_bucket', client=s3_client) - def test_retrieves_file_content(self, s3_client, caplog): - result = list_existing_s3_files(client=s3_client) + # assert 'Error retrieving S3 object dummy.txt: ClientError' in caplog.text + + + def test_retrieves_file_content(self, s3_client, caplog, s3_mock_bucket): + s3_client.upload_file('tests/dummy.txt', 'extract_bucket', 'dummy.txt') + result = list_existing_s3_files('extract_bucket', client=s3_client) - assert list(result.values()) == ['This is a test file.'] + assert list(result.values()) == ['This is a test file.'] class TestConnectToDatabase: - def test_connect_to_database(mock_conn, mock_config): + def test_connect_to_database(mock_conn, mock_config): ##had mock_config in param with patch("src.extract_lambda.Connection", autospec=True) as mock_conn: connect_to_database() mock_conn.assert_called_with( host="abc", user="def", port="5432", password="password", database="db" ) - def test_database_error(self, mock_config): + def test_database_error(self, mock_config): ##had mock_config in param with pytest.raises(DBConnectionException): connect_to_database() -- cgit v1.2.3 From c3c45c0d133ce32d48f1c72a0ac54f291038b1e7 Mon Sep 17 00:00:00 2001 From: Ellie Date: Mon, 19 Aug 2024 15:56:48 +0100 Subject: wip: fixing last test --- src/extract_lambda.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/extract_lambda.py b/src/extract_lambda.py index 4168e27..533bf82 100644 --- a/src/extract_lambda.py +++ b/src/extract_lambda.py @@ -147,12 +147,13 @@ def process_and_upload_tables(db, existing_files, client=boto3.client("s3")): WHERE table_schema='public' AND table_type='BASE TABLE';""" ) for table in tables: + print(tables) table_name = table[0] rows = db.run( f"SELECT * FROM {identifier(table_name)} " "WHERE last_updated >= :latest;", latest={datetime.strftime(latest_timestamp, "%H-%m-%d %H:%M:%S")}, ) - + print('rows', rows) # Creating a temporary file path and writing the column name to it followed by each row of data if rows: csv_file_path = f"/tmp/{table_name}.csv" @@ -183,6 +184,6 @@ def process_and_upload_tables(db, existing_files, client=boto3.client("s3")): else: load_status["no change"].append(table_name) logger.info( - f"No new data in {table_name} name. Latest data retrieved is from {latest_timestamp}." + f"No new data" ) return load_status -- cgit v1.2.3 From e4b66476a174edb68992b00b37bef2d0e0be3969 Mon Sep 17 00:00:00 2001 From: Ellie Date: Mon, 19 Aug 2024 15:57:14 +0100 Subject: wip: fixing last test --- tests/test_extract_lambda.py | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/tests/test_extract_lambda.py b/tests/test_extract_lambda.py index 4a5157b..01d7add 100644 --- a/tests/test_extract_lambda.py +++ b/tests/test_extract_lambda.py @@ -181,24 +181,27 @@ class TestProcessAndUploadTables: def test_error_process_and_upload_tables(mock_conn, s3_client, caplog): logger = logging.getLogger() logger.info('Testing now.') - caplog.set_level(logging.ERROR) + caplog.set_level(logging.INFO) #### - queries = ["SELECT table_name FROM information_schema.tables WHERE table_schema='public' AND table_type='BASE TABLE';", - "SELECT * FROM Fruits;", - "SELECT column_name FROM INFORMATION_SCHEMA.COLUMNS where table_name = 'Fruits'"] + queries = [ + "SELECT table_name FROM information_schema.tables WHERE table_schema='public' AND table_type='BASE TABLE';", + "SELECT * FROM Fruits;", + "SELECT column_name FROM INFORMATION_SCHEMA.COLUMNS where table_name = 'Fruits'" + ] return_values = [[['Fruits']], - [['Vegetable','Sour','Green'],['Berry','Sweet','Red']], - [['Food_type'],['Flavour'],['Colour']]] # why are individual column names in lists + [['Vegetable','Sour','Green','2022-11-03 14:20:49.962'],['Berry','Sweet','Red','2022-11-03 14:20:49.962']], + [['Food_type'],['Flavour'],['Colour'],['last_updated']]] # why are individual column names in lists vals = dict(zip(queries,return_values)) # {"SELECT table_name FROM information_schema.tables WHERE table_schema='public' AND table_type='BASE TABLE';": [['Fruits']], 'SELECT * FROM Fruits;': [['Vegetable', 'Sour', 'Green'], ['Berry', 'Sweet', 'Red']], "SELECT column_name FROM INFORMATION_SCHEMA.COLUMNS where table_name = 'Fruits'": [['Food_type'], ['Flavour'], ['Colour']]} with patch('src.extract_lambda.Connection') as mock_db: mock_db().run.side_effect = return_values s3_key = 'Fruits/2024/08/15/Fruits_16:46:30.csv' - existing_files = {s3_key: 'Food_type,Flavour,Colour\nFruit,Sour,Green\nBerry,Sweet,Red'} + existing_files = {s3_key: 'Food_type,Flavour,Colour,last_updated\nVegetable,Sour,Green,2022-11-03 14:20:49.962\nBerry,Sweet,Red, 2022-11-03 14:20:49.962'} s3_client.create_bucket(Bucket='test_extract_bucket', CreateBucketConfiguration={'LocationConstraint': 'eu-west-2'}) - print(s3_client.list_buckets) - s3_client.upload_file('tests/dummy_identical.csv', 'extract_bucket', s3_key) + s3_client.upload_file('tests/dummy_identical.csv', 'test_extract_bucket', s3_key) process_and_upload_tables(mock_db(), existing_files, client=s3_client) - assert 'No new data.' in caplog.text \ No newline at end of file + print('logger', logger.info('hello')) + print('our test', caplog.text) + assert 'No new data' in caplog.text \ No newline at end of file -- cgit v1.2.3 From ec3523a20d5ece3ce1d7b59072f5948f4fa40810 Mon Sep 17 00:00:00 2001 From: Ellie Date: Mon, 19 Aug 2024 15:57:40 +0100 Subject: amend dummy_identical --- tests/dummy_identical.csv | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/dummy_identical.csv b/tests/dummy_identical.csv index fdd8993..e44e9fc 100644 --- a/tests/dummy_identical.csv +++ b/tests/dummy_identical.csv @@ -1,4 +1,4 @@ -Food_type,Flavour,Colour -Vegetable,Sour,Green -Berry,Sweet,Red +Food_type,Flavour,Colour,last_updated +Vegetable,Sour,Green,2022-11-03 14:20:49.962 +Berry,Sweet,Red,2022-11-03 14:20:49.962 -- cgit v1.2.3 From 333822a70640712ac57036d37f7d8ac0787e9cc0 Mon Sep 17 00:00:00 2001 From: HastarTara Date: Mon, 19 Aug 2024 16:19:16 +0100 Subject: bugfixing --- tests/test_extract_lambda.py | 39 +++++++++++++++++++++++---------------- 1 file changed, 23 insertions(+), 16 deletions(-) diff --git a/tests/test_extract_lambda.py b/tests/test_extract_lambda.py index 01d7add..a4e8f2b 100644 --- a/tests/test_extract_lambda.py +++ b/tests/test_extract_lambda.py @@ -179,29 +179,36 @@ class TestConnectToDatabase: class TestProcessAndUploadTables: def test_error_process_and_upload_tables(mock_conn, s3_client, caplog): - logger = logging.getLogger() - logger.info('Testing now.') caplog.set_level(logging.INFO) - #### + + # Mock return values for database queries queries = [ "SELECT table_name FROM information_schema.tables WHERE table_schema='public' AND table_type='BASE TABLE';", - "SELECT * FROM Fruits;", - "SELECT column_name FROM INFORMATION_SCHEMA.COLUMNS where table_name = 'Fruits'" - ] - return_values = [[['Fruits']], - [['Vegetable','Sour','Green','2022-11-03 14:20:49.962'],['Berry','Sweet','Red','2022-11-03 14:20:49.962']], - [['Food_type'],['Flavour'],['Colour'],['last_updated']]] # why are individual column names in lists - vals = dict(zip(queries,return_values)) - # {"SELECT table_name FROM information_schema.tables WHERE table_schema='public' AND table_type='BASE TABLE';": [['Fruits']], 'SELECT * FROM Fruits;': [['Vegetable', 'Sour', 'Green'], ['Berry', 'Sweet', 'Red']], "SELECT column_name FROM INFORMATION_SCHEMA.COLUMNS where table_name = 'Fruits'": [['Food_type'], ['Flavour'], ['Colour']]} + "SELECT * FROM Fruits WHERE last_updated > :latest;", + "SELECT column_name FROM INFORMATION_SCHEMA.COLUMNS where table_name = 'Fruits';" + ] + return_values = [ + [['Fruits']], + [], # No new rows with a more recent last_updated timestamp + [['Food_type'], ['Flavour'], ['Colour'], ['last_updated']] + ] + vals = dict(zip(queries, return_values)) + # Patch the database connection and set return values for queries with patch('src.extract_lambda.Connection') as mock_db: mock_db().run.side_effect = return_values s3_key = 'Fruits/2024/08/15/Fruits_16:46:30.csv' - existing_files = {s3_key: 'Food_type,Flavour,Colour,last_updated\nVegetable,Sour,Green,2022-11-03 14:20:49.962\nBerry,Sweet,Red, 2022-11-03 14:20:49.962'} + existing_files = { + s3_key: 'Food_type,Flavour,Colour,last_updated\nVegetable,Sour,Green,2022-11-03 14:20:49.962\nBerry,Sweet,Red,2022-11-03 14:20:49.962' + } + + # Simulate S3 bucket and file setup s3_client.create_bucket(Bucket='test_extract_bucket', - CreateBucketConfiguration={'LocationConstraint': 'eu-west-2'}) + CreateBucketConfiguration={'LocationConstraint': 'eu-west-2'}) s3_client.upload_file('tests/dummy_identical.csv', 'test_extract_bucket', s3_key) + + # Run the process_and_upload_tables function process_and_upload_tables(mock_db(), existing_files, client=s3_client) - print('logger', logger.info('hello')) - print('our test', caplog.text) - assert 'No new data' in caplog.text \ No newline at end of file + + # Assert that the log contains "No new data" + assert 'No new data' in caplog.text -- cgit v1.2.3 From 8b4e78b781617f68554efebcda75d982a382f650 Mon Sep 17 00:00:00 2001 From: Alex Schofield Date: Mon, 19 Aug 2024 16:31:50 +0100 Subject: fix(tf): fix permissions for bucket/object access --- terraform/iam.tf | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/terraform/iam.tf b/terraform/iam.tf index 0e5fa6d..7585ff8 100644 --- a/terraform/iam.tf +++ b/terraform/iam.tf @@ -28,17 +28,19 @@ resource "aws_iam_role" "multi_service_role" { ######################################################################## # S3 SETUP # Description: allows allows retention/tagging/access control settings -# Lambda IAM Policy for S3 Write +# Lambda IAM Policy for S3 ######################################################################## # S3 DEFINE POLICY data "aws_iam_policy_document" "s3_data_policy_doc" { statement { + effect = "Allow" actions = [ "s3:PutObject", "s3:PutObjectRetention", "s3:PutObjectTagging", - "s3:PutObjectAcl" + "s3:PutObjectAcl", + "s3:ListObjects" ] resources = [ "${aws_s3_bucket.extract_bucket.arn}/*", @@ -46,6 +48,17 @@ data "aws_iam_policy_document" "s3_data_policy_doc" { "${aws_s3_bucket.lambda_code_bucket.arn}/*", ] } + + statement { + effect = "Allow" + actions = [ + "s3:ListBuckets", + "s3:ListAllMyBuckets" + ] + resources = [ + "arn:aws:s3:::*", + ] + } } -- cgit v1.2.3 From 982b8fa318c9065bd9037d14c56abcd126252978 Mon Sep 17 00:00:00 2001 From: Ellie Date: Mon, 19 Aug 2024 16:33:26 +0100 Subject: add working process and upload tables test --- src/extract_lambda.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/extract_lambda.py b/src/extract_lambda.py index 533bf82..5a5a631 100644 --- a/src/extract_lambda.py +++ b/src/extract_lambda.py @@ -150,8 +150,8 @@ def process_and_upload_tables(db, existing_files, client=boto3.client("s3")): print(tables) table_name = table[0] rows = db.run( - f"SELECT * FROM {identifier(table_name)} " "WHERE last_updated >= :latest;", - latest={datetime.strftime(latest_timestamp, "%H-%m-%d %H:%M:%S")}, + f"SELECT * FROM {identifier(table_name)} WHERE last_updated >= :latest;", + latest={datetime.strftime(latest_timestamp, "%Y-%m-%d %H:%M:%S")}, ) print('rows', rows) # Creating a temporary file path and writing the column name to it followed by each row of data -- cgit v1.2.3 From 4f629e532a1e989096985dc9cd9e6f03f7b44354 Mon Sep 17 00:00:00 2001 From: Ellie Date: Mon, 19 Aug 2024 16:33:46 +0100 Subject: add working process and upload tables test --- tests/test_extract_lambda.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/test_extract_lambda.py b/tests/test_extract_lambda.py index a4e8f2b..3405743 100644 --- a/tests/test_extract_lambda.py +++ b/tests/test_extract_lambda.py @@ -209,6 +209,7 @@ class TestProcessAndUploadTables: # Run the process_and_upload_tables function process_and_upload_tables(mock_db(), existing_files, client=s3_client) - # Assert that the log contains "No new data" assert 'No new data' in caplog.text + + # process and upload tables needs more tests \ No newline at end of file -- cgit v1.2.3 From 3e35364cc425db8738fb247a18f91c052c49fa8f Mon Sep 17 00:00:00 2001 From: Alex Schofield Date: Mon, 19 Aug 2024 16:36:39 +0100 Subject: chore: remove redundant test folder --- test/test_secrets_manager.py | 39 --------------------------------------- 1 file changed, 39 deletions(-) delete mode 100644 test/test_secrets_manager.py diff --git a/test/test_secrets_manager.py b/test/test_secrets_manager.py deleted file mode 100644 index cb4ec15..0000000 --- a/test/test_secrets_manager.py +++ /dev/null @@ -1,39 +0,0 @@ -from src.secrets_manager import sm_client, create_secret, list_secret -import boto3 -from moto import mock_aws -import json -import pytest -import os - -pytest.fixture(scope="class") - - -def mock_aws_credentials(): - """Mocked AWS Credentials for moto.""" - os.environ["AWS_ACCESS_KEY_ID"] = "testing" - os.environ["AWS_SECRET_ACCESS_KEY"] = "testing" - os.environ["AWS_SECURITY_TOKEN"] = "testing" - os.environ["AWS_SESSION_TOKEN"] = "testing" - os.environ["AWS_DEFAULT_REGION"] = "eu-west-2" - - -@pytest.fixture(scope="class") -def mock_sm_client(mock_aws_credentials): - with mock_aws(): - yield boto3.client("secretsmanager") - - -def test_create_secret_stores_secrets(mock_sm_client): - cohort_id = "test_cohort_id" - user = "test_user_id" - password = "test_password" - host = "test_host" - database = "test_database" - port = "test_port" - - secret_name = "test_secret" - response = create_secret( - mock_sm_client, secret_name, cohort_id, user, password, host, database, port - ) - - assert response["Name"] == secret_name -- cgit v1.2.3 From 91d2e615a6af595898de2e329299c9cf42fc74f7 Mon Sep 17 00:00:00 2001 From: "deepsource-autofix[bot]" <62050782+deepsource-autofix[bot]@users.noreply.github.com> Date: Mon, 19 Aug 2024 16:00:10 +0000 Subject: style: format code with Autopep8, Black and Ruff Formatter This commit fixes the style issues introduced in b9f3576 according to the output from Autopep8, Black and Ruff Formatter. Details: https://github.com/ajschofield/de-project-bentley/pull/64 --- tests/test_extract_lambda.py | 81 +++++++++++++++++++++++++------------------- 1 file changed, 47 insertions(+), 34 deletions(-) diff --git a/tests/test_extract_lambda.py b/tests/test_extract_lambda.py index 665e419..02e3d3c 100644 --- a/tests/test_extract_lambda.py +++ b/tests/test_extract_lambda.py @@ -5,11 +5,18 @@ import boto3 from moto import mock_aws from unittest.mock import patch, MagicMock from unittest import TestCase -from src.extract_lambda import list_existing_s3_files, connect_to_database, DBConnectionException, process_and_upload_tables, extract_bucket +from src.extract_lambda import ( + list_existing_s3_files, + connect_to_database, + DBConnectionException, + process_and_upload_tables, + extract_bucket, +) import logging import os -@pytest.fixture(scope='class') + +@pytest.fixture(scope="class") def mock_config(): env_vars = { "host": "abc", @@ -18,44 +25,47 @@ def mock_config(): "password": "password", "database": "db", } - with patch("src.extract_lambda.retrieve_secrets", return_value=env_vars) as mock_config: + with patch( + "src.extract_lambda.retrieve_secrets", return_value=env_vars + ) as mock_config: yield mock_config -@pytest.fixture(scope='class') +@pytest.fixture(scope="class") def aws_credentials(): - os.environ["AWS_ACCESS_KEY_ID"] = 'testing' - os.environ["AWS_SECRET_ACCESS_KEY"] = 'testing' - os.environ["AWS_SECURITY_TOKEN"] = 'testing' - os.environ["AWS_SESSION_TOKEN"] = 'testing' - os.environ["AWS_DEFAULT_REGION"]= 'eu-west-2' + os.environ["AWS_ACCESS_KEY_ID"] = "testing" + os.environ["AWS_SECRET_ACCESS_KEY"] = "testing" + os.environ["AWS_SECURITY_TOKEN"] = "testing" + os.environ["AWS_SESSION_TOKEN"] = "testing" + os.environ["AWS_DEFAULT_REGION"] = "eu-west-2" + -@pytest.fixture(scope='class') +@pytest.fixture(scope="class") def s3_client(aws_credentials): with mock_aws(): - yield boto3.client('s3') + yield boto3.client("s3") + -@pytest.fixture(scope='class') +@pytest.fixture(scope="class") def s3_mock_bucket(s3_client): - bucket = s3_client.create_bucket(Bucket='extract_bucket', - CreateBucketConfiguration={ - 'LocationConstraint': 'eu-west-2' - }) + bucket = s3_client.create_bucket( + Bucket="extract_bucket", + CreateBucketConfiguration={"LocationConstraint": "eu-west-2"}, + ) return bucket + class TestListExistingS3Files: def test_error_if_no_bucket(self, s3_client, caplog): - logger = logging.getLogger() - logger.info('Testing now.') + logger.info("Testing now.") caplog.set_level(logging.ERROR) list_existing_s3_files(client=s3_client) - assert 'Error listing S3 objects' in caplog.text + assert "Error listing S3 objects" in caplog.text def test_error_if_bucket_is_empty(self, s3_client, caplog, s3_mock_bucket): - list_existing_s3_files('extract_bucket', client=s3_client) - assert 'The bucket is empty' in caplog.text - + list_existing_s3_files("extract_bucket", client=s3_client) + assert "The bucket is empty" in caplog.text # def test_error_retrieving_object(self, s3_client, caplog, s3_mock_bucket): # s3_client.upload_file('tests/dummy.txt', 'extract_bucket', 'dummy.txt') @@ -64,33 +74,36 @@ class TestListExistingS3Files: # assert 'Error retrieving S3 object dummy.txt: ClientError' in caplog.text - def test_retrieves_file_content(self, s3_client, caplog, s3_mock_bucket): - s3_client.upload_file('tests/dummy.txt', 'extract_bucket', 'dummy.txt') - result = list_existing_s3_files('extract_bucket', client=s3_client) + s3_client.upload_file("tests/dummy.txt", "extract_bucket", "dummy.txt") + result = list_existing_s3_files("extract_bucket", client=s3_client) + + assert list(result.values()) == ["This is a test file."] - assert list(result.values()) == ['This is a test file.'] class TestConnectToDatabase: - def test_connect_to_database(mock_conn, mock_config): ##had mock_config in param - with patch("src.extract_lambda.Connection", autospec=True) as mock_conn: + # had mock_config in param + def test_connect_to_database(mock_conn, mock_config): + with patch("src.extract_lambda.Connection", autospec=True) as mock_conn: connect_to_database() mock_conn.assert_called_with( - host="abc", user="def", port="5432", password="password", database="db" + host="abc", user="def", port="5432", password="password", database="db" ) - def test_database_error(self, mock_config): ##had mock_config in param + def test_database_error(self, mock_config): # had mock_config in param with pytest.raises(DBConnectionException): connect_to_database() def test_logs_interface_error(self, caplog): logger = logging.getLogger() - logger.info('Testing now.') + logger.info("Testing now.") caplog.set_level(logging.ERROR) with pytest.raises(DBConnectionException): connect_to_database() - assert 'Interface error' in caplog.text -''' + assert "Interface error" in caplog.text + + +""" class TestProcessAndUploadTables: def test_error_process_and_upload_tables(mock_conn, mock_config, s3_client, caplog): logger = logging.getLogger() @@ -115,4 +128,4 @@ class TestProcessAndUploadTables: s3_client.upload_file('tests/dummy_identical.csv', 'extract_bucket', s3_key) process_and_upload_tables(mock_db(), existing_files, client=s3_client) assert 'No new data.' in caplog.text -''' \ No newline at end of file +""" -- cgit v1.2.3 From b80ad74122609fca98597d9a04518df855b58aed Mon Sep 17 00:00:00 2001 From: "deepsource-autofix[bot]" <62050782+deepsource-autofix[bot]@users.noreply.github.com> Date: Mon, 19 Aug 2024 16:10:22 +0000 Subject: style: format code with Autopep8, Black and Ruff Formatter This commit fixes the style issues introduced in 4a23069 according to the output from Autopep8, Black and Ruff Formatter. Details: https://github.com/ajschofield/de-project-bentley/pull/64 --- tests/test_extract_lambda.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/test_extract_lambda.py b/tests/test_extract_lambda.py index b1894cc..a43ae0a 100644 --- a/tests/test_extract_lambda.py +++ b/tests/test_extract_lambda.py @@ -55,6 +55,7 @@ def s3_mock_bucket(s3_client): ) return bucket + class TestLambdaHandler: def test_lambda_handler_files_processed_and_uploaded_successfully(self, mocker): mock_db = MagicMock() @@ -138,6 +139,7 @@ class TestLambdaHandler: mock_list_existing_s3_files.assert_not_called() mock_process_and_upload_tables.assert_not_called() + class TestListExistingS3Files: def test_error_if_no_bucket(self, s3_client, caplog): logger = logging.getLogger() @@ -175,4 +177,4 @@ class TestConnectToDatabase: caplog.set_level(logging.ERROR) with pytest.raises(DBConnectionException): connect_to_database() - assert "Interface error" in caplog.text \ No newline at end of file + assert "Interface error" in caplog.text -- cgit v1.2.3 From a42d030fb663ad7eb040498cfc5f0627a27d6cc6 Mon Sep 17 00:00:00 2001 From: "deepsource-autofix[bot]" <62050782+deepsource-autofix[bot]@users.noreply.github.com> Date: Mon, 19 Aug 2024 16:11:44 +0000 Subject: style: format code with Autopep8, Black and Ruff Formatter This commit fixes the style issues introduced in 4f629e5 according to the output from Autopep8, Black and Ruff Formatter. Details: https://github.com/ajschofield/de-project-bentley/pull/65 --- src/extract_lambda.py | 8 +++----- tests/test_extract_lambda.py | 34 ++++++++++++++++++++-------------- 2 files changed, 23 insertions(+), 19 deletions(-) diff --git a/src/extract_lambda.py b/src/extract_lambda.py index 5a5a631..9b17ef2 100644 --- a/src/extract_lambda.py +++ b/src/extract_lambda.py @@ -151,9 +151,9 @@ def process_and_upload_tables(db, existing_files, client=boto3.client("s3")): table_name = table[0] rows = db.run( f"SELECT * FROM {identifier(table_name)} WHERE last_updated >= :latest;", - latest={datetime.strftime(latest_timestamp, "%Y-%m-%d %H:%M:%S")}, + latest={datetime.strftime(latest_timestamp, "%Y-%m-%d %H:%M:%S")}, ) - print('rows', rows) + print("rows", rows) # Creating a temporary file path and writing the column name to it followed by each row of data if rows: csv_file_path = f"/tmp/{table_name}.csv" @@ -183,7 +183,5 @@ def process_and_upload_tables(db, existing_files, client=boto3.client("s3")): logger.error(f"Error uploading to S3: {e}") else: load_status["no change"].append(table_name) - logger.info( - f"No new data" - ) + logger.info(f"No new data") return load_status diff --git a/tests/test_extract_lambda.py b/tests/test_extract_lambda.py index 3405743..5a1c5b2 100644 --- a/tests/test_extract_lambda.py +++ b/tests/test_extract_lambda.py @@ -12,7 +12,7 @@ from src.extract_lambda import ( DBConnectionException, lambda_handler, process_and_upload_tables, - retrieve_secrets + retrieve_secrets, ) @@ -25,7 +25,9 @@ def mock_config(): "password": "password", "database": "db", } - with patch("src.extract_lambda.retrieve_secrets", return_value=env_vars) as mock_config: + with patch( + "src.extract_lambda.retrieve_secrets", return_value=env_vars + ) as mock_config: yield mock_config @@ -185,31 +187,35 @@ class TestProcessAndUploadTables: queries = [ "SELECT table_name FROM information_schema.tables WHERE table_schema='public' AND table_type='BASE TABLE';", "SELECT * FROM Fruits WHERE last_updated > :latest;", - "SELECT column_name FROM INFORMATION_SCHEMA.COLUMNS where table_name = 'Fruits';" + "SELECT column_name FROM INFORMATION_SCHEMA.COLUMNS where table_name = 'Fruits';", ] return_values = [ - [['Fruits']], + [["Fruits"]], [], # No new rows with a more recent last_updated timestamp - [['Food_type'], ['Flavour'], ['Colour'], ['last_updated']] + [["Food_type"], ["Flavour"], ["Colour"], ["last_updated"]], ] vals = dict(zip(queries, return_values)) # Patch the database connection and set return values for queries - with patch('src.extract_lambda.Connection') as mock_db: + with patch("src.extract_lambda.Connection") as mock_db: mock_db().run.side_effect = return_values - s3_key = 'Fruits/2024/08/15/Fruits_16:46:30.csv' + s3_key = "Fruits/2024/08/15/Fruits_16:46:30.csv" existing_files = { - s3_key: 'Food_type,Flavour,Colour,last_updated\nVegetable,Sour,Green,2022-11-03 14:20:49.962\nBerry,Sweet,Red,2022-11-03 14:20:49.962' + s3_key: "Food_type,Flavour,Colour,last_updated\nVegetable,Sour,Green,2022-11-03 14:20:49.962\nBerry,Sweet,Red,2022-11-03 14:20:49.962" } # Simulate S3 bucket and file setup - s3_client.create_bucket(Bucket='test_extract_bucket', - CreateBucketConfiguration={'LocationConstraint': 'eu-west-2'}) - s3_client.upload_file('tests/dummy_identical.csv', 'test_extract_bucket', s3_key) - + s3_client.create_bucket( + Bucket="test_extract_bucket", + CreateBucketConfiguration={"LocationConstraint": "eu-west-2"}, + ) + s3_client.upload_file( + "tests/dummy_identical.csv", "test_extract_bucket", s3_key + ) + # Run the process_and_upload_tables function process_and_upload_tables(mock_db(), existing_files, client=s3_client) # Assert that the log contains "No new data" - assert 'No new data' in caplog.text + assert "No new data" in caplog.text - # process and upload tables needs more tests \ No newline at end of file + # process and upload tables needs more tests -- cgit v1.2.3 From b499d78dc660017694ec683c90aba3f558c00669 Mon Sep 17 00:00:00 2001 From: "deepsource-autofix[bot]" <62050782+deepsource-autofix[bot]@users.noreply.github.com> Date: Mon, 19 Aug 2024 16:14:07 +0000 Subject: style: format code with Autopep8, Black and Ruff Formatter This commit fixes the style issues introduced in f014d1a according to the output from Autopep8, Black and Ruff Formatter. Details: https://github.com/ajschofield/de-project-bentley/pull/65 --- tests/test_extract_lambda.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_extract_lambda.py b/tests/test_extract_lambda.py index 347ef22..3931cfc 100644 --- a/tests/test_extract_lambda.py +++ b/tests/test_extract_lambda.py @@ -180,6 +180,7 @@ class TestConnectToDatabase: connect_to_database() assert "Interface error" in caplog.text + class TestProcessAndUploadTables: def test_error_process_and_upload_tables(mock_conn, s3_client, caplog): caplog.set_level(logging.INFO) @@ -218,4 +219,3 @@ class TestProcessAndUploadTables: process_and_upload_tables(mock_db(), existing_files, client=s3_client) # Assert that the log contains "No new data" assert "No new data" in caplog.text - -- cgit v1.2.3 From e537bdef11d1d518d4df1c057f3624e3fe6da24d Mon Sep 17 00:00:00 2001 From: Alex Schofield Date: Mon, 19 Aug 2024 19:05:41 +0100 Subject: infra(tf): remove rds.tf --- terraform/rds.tf | 70 -------------------------------------------------------- 1 file changed, 70 deletions(-) delete mode 100644 terraform/rds.tf diff --git a/terraform/rds.tf b/terraform/rds.tf deleted file mode 100644 index a013fb3..0000000 --- a/terraform/rds.tf +++ /dev/null @@ -1,70 +0,0 @@ -# data "aws_availability_zones" "available" {} - -# module "vpc" { -# source = "terraform-aws-modules/vpc/aws" -# version = "5.12.1" - -# name = var.project_name -# cidr = "10.0.0.0/16" -# azs = data.aws_availability_zones.available.names -# public_subnets = ["10.0.4.0/24", "10.0.5.0/24", "10.0.6.0/24"] -# enable_dns_hostnames = true -# enable_dns_support = true -# } - -# resource "aws_db_subnet_group" "Terrific-Totes-sub-gr" { -# name = "tt-db-subnet" -# subnet_ids = module.vpc.public_subnets - -# tags = { -# Name = "${var.project_name}" -# } -# } - -# resource "aws_security_group" "rds" { -# name = "${var.project_name}-rds" -# vpc_id = module.vpc.vpc_id - -# ingress { -# from_port = 5432 -# to_port = 5432 -# protocol = "tcp" -# cidr_blocks = ["0.0.0.0/0"] -# } - -# egress { -# from_port = 5432 -# to_port = 5432 -# protocol = "tcp" -# cidr_blocks = ["0.0.0.0/0"] -# } - -# tags = { -# Name = "${var.project_name}-rds" -# } -# } - -# resource "aws_db_parameter_group" "Terrific-Totes-param-gr" { -# name = "tt-db-param" -# family = "postgres14" - -# parameter { -# name = "log_connections" -# value = "1" -# } -# } - -# resource "aws_db_instance" "terrific-totes-rds" { -# db_name = var.project_name -# instance_class = "db.t3.micro" -# allocated_storage = 5 -# engine = "postgres" -# engine_version = "14.10" -# username = "" -# password = "" -# db_subnet_group_name = aws_db_subnet_group.Terrific-Totes-sub-gr.name -# vpc_security_group_ids = [aws_security_group.rds.id] -# parameter_group_name = aws_db_parameter_group.Terrific-Totes-param-gr.name -# publicly_accessible = false -# skip_final_snapshot = true -# } -- cgit v1.2.3 From 56b2c376a925132f3bf2c7e6cad4911400955129 Mon Sep 17 00:00:00 2001 From: Alex Schofield Date: Mon, 19 Aug 2024 19:07:30 +0100 Subject: infra(tf): enforce version constraint on terraform --- terraform/main.tf | 1 + 1 file changed, 1 insertion(+) diff --git a/terraform/main.tf b/terraform/main.tf index 310a251..206fc74 100644 --- a/terraform/main.tf +++ b/terraform/main.tf @@ -1,4 +1,5 @@ terraform { + required_version = ">= 1.8.0" required_providers { aws = { source = "hashicorp/aws" -- cgit v1.2.3 From 35bf4e8668309cb28175ef0224a6bce453abb47f Mon Sep 17 00:00:00 2001 From: Alex Schofield Date: Mon, 19 Aug 2024 19:19:44 +0100 Subject: chore(tf): replace static tag values in main.tf with variables --- terraform/main.tf | 8 ++++---- terraform/vars.tf | 20 ++++++++++++++++++++ 2 files changed, 24 insertions(+), 4 deletions(-) diff --git a/terraform/main.tf b/terraform/main.tf index 206fc74..5ccbec2 100644 --- a/terraform/main.tf +++ b/terraform/main.tf @@ -25,11 +25,11 @@ provider "aws" { region = "eu-west-2" default_tags { tags = { - ProjectName = "Terrific-Totes" - Team = "Team-Bentley" - Environment = "Dev" - GitHubRepo = "de-project-bentley" + ProjectName = var.project_name + Environment = var.environment ManagedBy = "Terraform" + GitHubRepo = var.github_repo + Team = var.team_name } } } diff --git a/terraform/vars.tf b/terraform/vars.tf index 3c88731..1adbcf7 100644 --- a/terraform/vars.tf +++ b/terraform/vars.tf @@ -33,6 +33,26 @@ variable "project_name" { default = "tt" } +variable "aws_region" { + type = string + default = "eu-west-2" +} + +variable "environment" { + type = string + default = "dev" +} + +variable "github_repo" { + type = string + default = "de-project-bentley" +} + +variable "team_name" { + type = string + default = "Team-Bentley" +} + data "aws_caller_identity" "current" {} data "aws_region" "current" {} -- cgit v1.2.3 From 95c4fe80aea75a9a63b1cfd85abadaab6b96b876 Mon Sep 17 00:00:00 2001 From: Alex Schofield Date: Mon, 19 Aug 2024 19:24:23 +0100 Subject: infra(tf): add state file encryption --- terraform/main.tf | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/terraform/main.tf b/terraform/main.tf index 5ccbec2..33c760c 100644 --- a/terraform/main.tf +++ b/terraform/main.tf @@ -15,9 +15,10 @@ terraform { } } backend "s3" { - bucket = "bentley-project-secrets" - key = "bentley-project/terraform.tfstate" - region = "eu-west-2" + bucket = "bentley-project-secrets" + key = "bentley-project/terraform.tfstate" + region = "eu-west-2" + encrypt = true } } -- cgit v1.2.3 From 22e7de562e62495e547eeff187d86bf9524ae5ca Mon Sep 17 00:00:00 2001 From: Alex Schofield Date: Mon, 19 Aug 2024 20:22:16 +0100 Subject: feat: create shell script for terraform destroy/apply --- scripts/deploy.sh | 15 +++++++++++++++ 1 file changed, 15 insertions(+) create mode 100755 scripts/deploy.sh diff --git a/scripts/deploy.sh b/scripts/deploy.sh new file mode 100755 index 0000000..0446184 --- /dev/null +++ b/scripts/deploy.sh @@ -0,0 +1,15 @@ +# Deploy Script +# Description: Deploy and destroy Terraform +# WARNING: This will most likely destroy any current infrastructure if protections +# are not in place. Be careful! + +echo "WARNING: This script will destroy any infrastructure for testing." +echo "It should not be used once a proper deployment has been setup." +echo "Would you like to continue?" + +select yn in "Yes" "No"; do + case $yn in + Yes ) cd ../terraform/; terraform destroy -auto-approve; terraform apply -auto-approve; terraform destroy -auto-approve; break;; + No ) exit;; + esac +done -- cgit v1.2.3 From 50302044c64e414ffe0435908146bc718bf6bed9 Mon Sep 17 00:00:00 2001 From: Alex Schofield Date: Mon, 19 Aug 2024 20:24:35 +0100 Subject: feat(deploy.sh): exit if any command returns non-zero status --- scripts/deploy.sh | 3 +++ 1 file changed, 3 insertions(+) diff --git a/scripts/deploy.sh b/scripts/deploy.sh index 0446184..16a9e13 100755 --- a/scripts/deploy.sh +++ b/scripts/deploy.sh @@ -3,6 +3,9 @@ # WARNING: This will most likely destroy any current infrastructure if protections # are not in place. Be careful! +# Exit if any command has a non-zero status +set -e + echo "WARNING: This script will destroy any infrastructure for testing." echo "It should not be used once a proper deployment has been setup." echo "Would you like to continue?" -- cgit v1.2.3 From 18f7ea0e4254890cd810ff2ee257306d94467faf Mon Sep 17 00:00:00 2001 From: Alex Schofield Date: Mon, 19 Aug 2024 20:49:28 +0100 Subject: refactor: Improve deploy script user interaction --- scripts/deploy.sh | 30 ++++++++++++++++++++++++++---- 1 file changed, 26 insertions(+), 4 deletions(-) diff --git a/scripts/deploy.sh b/scripts/deploy.sh index 16a9e13..d7d18ff 100755 --- a/scripts/deploy.sh +++ b/scripts/deploy.sh @@ -11,8 +11,30 @@ echo "It should not be used once a proper deployment has been setup." echo "Would you like to continue?" select yn in "Yes" "No"; do - case $yn in - Yes ) cd ../terraform/; terraform destroy -auto-approve; terraform apply -auto-approve; terraform destroy -auto-approve; break;; - No ) exit;; - esac + case $yn in + Yes) + cd ../terraform/ + echo "Would you like to destroy the current infrastructure?" + select destroy_1 in "Yes" "No"; do + case $destroy_1 in + Yes) + terraform destroy + break + ;; + No) + echo "Skipping initial destroy..." + break + ;; + esac + done + + terraform apply -auto-approve + + break + ;; + No) + echo "Operation cancelled..." + exit + ;; + esac done -- cgit v1.2.3 From 68be61c22703d56a10e654702d15407231385b65 Mon Sep 17 00:00:00 2001 From: Alex Schofield Date: Mon, 19 Aug 2024 20:51:30 +0100 Subject: feat: ask user if they want to destroy new infrastructure --- scripts/deploy.sh | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/scripts/deploy.sh b/scripts/deploy.sh index d7d18ff..e56088e 100755 --- a/scripts/deploy.sh +++ b/scripts/deploy.sh @@ -28,7 +28,21 @@ select yn in "Yes" "No"; do esac done - terraform apply -auto-approve + terraform apply + + echo "Would you like to destroy the newly-created infrastructure?" + select destroy_2 in "Yes" "No"; do + case $destroy_2 in + Yes) + terraform destroy + break + ;; + No) + echo "Skipping final destroy... Infrastructure will remain." + break + ;; + esac + done break ;; -- cgit v1.2.3 From 57e855a797f225cd77401e85a671cde95e07ee70 Mon Sep 17 00:00:00 2001 From: Alex Schofield Date: Mon, 19 Aug 2024 20:55:19 +0100 Subject: style(tf): improve legibility of lambda.tf sections --- terraform/lambda.tf | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/terraform/lambda.tf b/terraform/lambda.tf index 72aae04..b7b362b 100644 --- a/terraform/lambda.tf +++ b/terraform/lambda.tf @@ -1,4 +1,7 @@ -# Extract Lambda Function +########################### +# Extract Lambda Function # +########################### + data "archive_file" "extract_lambda_zip" { type = "zip" source_file = "${path.module}/../src/extract_lambda.py" @@ -28,7 +31,10 @@ resource "aws_lambda_function" "extract_lambda" { depends_on = [aws_s3_object.extract_lambda_code] } -# Transform Lambda Function +############################# +# Transform Lambda Function # +############################# + data "archive_file" "transform_lambda_zip" { type = "zip" source_file = "${path.module}/../src/transform_lambda.py" @@ -58,7 +64,10 @@ resource "aws_lambda_function" "transform_lambda" { depends_on = [aws_s3_object.transform_lambda_code] } -# Load Lambda Function +######################## +# Load Lambda Function # +######################## + data "archive_file" "load_lambda_zip" { type = "zip" source_file = "${path.module}/../src/load_lambda.py" @@ -88,7 +97,10 @@ resource "aws_lambda_function" "load_lambda" { depends_on = [aws_s3_object.load_lambda_code] } -# Lambda Layer Specification +###################### +# Lambda Layer Setup # +###################### + locals { layer_dir = "../" layer_zip = "layer.zip" -- cgit v1.2.3 From d0e7b1304efe4ab6de2dc5bef1691b389a5bc449 Mon Sep 17 00:00:00 2001 From: Alex Schofield Date: Mon, 19 Aug 2024 20:58:46 +0100 Subject: refactor(tf): move sections in lambda.tf for better readability --- terraform/lambda.tf | 69 ++++++++++++++++++++++++++++------------------------- 1 file changed, 37 insertions(+), 32 deletions(-) diff --git a/terraform/lambda.tf b/terraform/lambda.tf index b7b362b..aa730c1 100644 --- a/terraform/lambda.tf +++ b/terraform/lambda.tf @@ -1,3 +1,40 @@ +#################### +# Common Variables # +#################### + +locals { + layer_dir = "../" + layer_zip = "layer.zip" + layer_name = "lambda_layer" + script_dir = "../scripts" +} + +###################### +# Lambda Layer Setup # +###################### + +resource "null_resource" "prepare_layer" { + provisioner "local-exec" { + command = "bash ${local.script_dir}/make_layer_zip.sh" + } +} + +resource "aws_s3_object" "lambda_layer_zip" { + bucket = aws_s3_bucket.lambda_code_bucket.id #bucket instead of id + key = "${local.layer_name}/${local.layer_zip}" + source = "${local.layer_dir}/${local.layer_zip}" + depends_on = [null_resource.prepare_layer] +} + +resource "aws_lambda_layer_version" "lambda_layer" { + layer_name = local.layer_name + compatible_runtimes = ["python3.11"] + s3_bucket = aws_s3_bucket.lambda_code_bucket.bucket + s3_key = aws_s3_object.lambda_layer_zip.key + skip_destroy = true + depends_on = [aws_s3_object.lambda_layer_zip] +} + ########################### # Extract Lambda Function # ########################### @@ -97,35 +134,3 @@ resource "aws_lambda_function" "load_lambda" { depends_on = [aws_s3_object.load_lambda_code] } -###################### -# Lambda Layer Setup # -###################### - -locals { - layer_dir = "../" - layer_zip = "layer.zip" - layer_name = "lambda_layer" - script_dir = "../scripts" -} - -resource "null_resource" "prepare_layer" { - provisioner "local-exec" { - command = "bash ${local.script_dir}/make_layer_zip.sh" - } -} - -resource "aws_s3_object" "lambda_layer_zip" { - bucket = aws_s3_bucket.lambda_code_bucket.id #bucket instead of id - key = "${local.layer_name}/${local.layer_zip}" - source = "${local.layer_dir}/${local.layer_zip}" - depends_on = [null_resource.prepare_layer] -} - -resource "aws_lambda_layer_version" "lambda_layer" { - layer_name = local.layer_name - compatible_runtimes = ["python3.11"] - s3_bucket = aws_s3_bucket.lambda_code_bucket.bucket - s3_key = aws_s3_object.lambda_layer_zip.key - skip_destroy = true - depends_on = [aws_s3_object.lambda_layer_zip] -} -- cgit v1.2.3 From b75b7197f08e933cfcd4b69ad5182a01c2886d8e Mon Sep 17 00:00:00 2001 From: Alex Schofield Date: Mon, 19 Aug 2024 21:05:33 +0100 Subject: refactor: change directory at start of the script to terraform folder --- scripts/deploy.sh | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/scripts/deploy.sh b/scripts/deploy.sh index e56088e..f631bbc 100755 --- a/scripts/deploy.sh +++ b/scripts/deploy.sh @@ -6,6 +6,9 @@ # Exit if any command has a non-zero status set -e +# Change current directory to terraform folder at the start +cd ../terraform/ + echo "WARNING: This script will destroy any infrastructure for testing." echo "It should not be used once a proper deployment has been setup." echo "Would you like to continue?" @@ -13,7 +16,6 @@ echo "Would you like to continue?" select yn in "Yes" "No"; do case $yn in Yes) - cd ../terraform/ echo "Would you like to destroy the current infrastructure?" select destroy_1 in "Yes" "No"; do case $destroy_1 in -- cgit v1.2.3 From cfd6b462a874da77ada8facb3b2a3c0e85059fa4 Mon Sep 17 00:00:00 2001 From: Alex Schofield Date: Mon, 19 Aug 2024 21:07:10 +0100 Subject: infra(tf): only create layer zip if doesn't exist --- terraform/lambda.tf | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/terraform/lambda.tf b/terraform/lambda.tf index aa730c1..b1e0d8e 100644 --- a/terraform/lambda.tf +++ b/terraform/lambda.tf @@ -3,10 +3,11 @@ #################### locals { - layer_dir = "../" - layer_zip = "layer.zip" - layer_name = "lambda_layer" - script_dir = "../scripts" + layer_dir = "../" + layer_zip = "layer.zip" + layer_name = "lambda_layer" + script_dir = "../scripts" + layer_zip_path = "${local.layer_dir}/${local.layer_zip}" } ###################### @@ -14,8 +15,13 @@ locals { ###################### resource "null_resource" "prepare_layer" { + + triggers = { + layer_zip_exists = fileexists(local.layer_zip_path) ? "exists" : "not_exists" + } + provisioner "local-exec" { - command = "bash ${local.script_dir}/make_layer_zip.sh" + command = "if [ ! -f ${local.layer_zip_path} ]; then bash ${local.script_dir}/make_layer_zip.sh; fi" } } -- cgit v1.2.3 From f035f60c7ece05b70275760238c5513b8f113310 Mon Sep 17 00:00:00 2001 From: Alex Schofield Date: Mon, 19 Aug 2024 21:09:26 +0100 Subject: docs(tf): add information about layer zip creation --- terraform/lambda.tf | 2 ++ 1 file changed, 2 insertions(+) diff --git a/terraform/lambda.tf b/terraform/lambda.tf index b1e0d8e..fc10431 100644 --- a/terraform/lambda.tf +++ b/terraform/lambda.tf @@ -16,6 +16,8 @@ locals { resource "null_resource" "prepare_layer" { + # New change: only run the script if the layer zip does not exist + triggers = { layer_zip_exists = fileexists(local.layer_zip_path) ? "exists" : "not_exists" } -- cgit v1.2.3 From 40c2952e628a92e63b3468be4d49f44a234cacce Mon Sep 17 00:00:00 2001 From: Alex Schofield Date: Mon, 19 Aug 2024 21:13:03 +0100 Subject: infra(tf): add md5/source_code_hash checks for lambda layer --- terraform/lambda.tf | 2 ++ 1 file changed, 2 insertions(+) diff --git a/terraform/lambda.tf b/terraform/lambda.tf index fc10431..f8e7515 100644 --- a/terraform/lambda.tf +++ b/terraform/lambda.tf @@ -32,6 +32,7 @@ resource "aws_s3_object" "lambda_layer_zip" { key = "${local.layer_name}/${local.layer_zip}" source = "${local.layer_dir}/${local.layer_zip}" depends_on = [null_resource.prepare_layer] + etag = fileexists(local.layer_zip_path) ? filemd5(local.layer_zip_path) : null } resource "aws_lambda_layer_version" "lambda_layer" { @@ -39,6 +40,7 @@ resource "aws_lambda_layer_version" "lambda_layer" { compatible_runtimes = ["python3.11"] s3_bucket = aws_s3_bucket.lambda_code_bucket.bucket s3_key = aws_s3_object.lambda_layer_zip.key + source_code_hash = fileexists(local.layer_zip_path) ? filebase64sha256(local.layer_zip_path) : null skip_destroy = true depends_on = [aws_s3_object.lambda_layer_zip] } -- cgit v1.2.3 From e5715bc33d4470ceccb17c6853c3e52d4b1035d3 Mon Sep 17 00:00:00 2001 From: Alex Schofield Date: Mon, 19 Aug 2024 21:20:09 +0100 Subject: chore(tf): add tags to s3 buckets --- terraform/s3.tf | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/terraform/s3.tf b/terraform/s3.tf index d5cdee3..97910c8 100644 --- a/terraform/s3.tf +++ b/terraform/s3.tf @@ -1,14 +1,24 @@ ### EXTRACT BUCKET SET-UP resource "aws_s3_bucket" "extract_bucket" { bucket_prefix = "${var.s3_extract_bucket_name}-" + + tags = { + Name = "Ingestion Bucket" + } } ### TRANSFORM BUCKET SET-UP resource "aws_s3_bucket" "transform_bucket" { bucket_prefix = "${var.s3_transform_bucket_name}-" + tags = { + Name = "Transform Bucket" + } } ### LAMBDA BUCKET resource "aws_s3_bucket" "lambda_code_bucket" { bucket_prefix = "${var.s3_code_bucket_name}-" + tags = { + Name = "Load Bucket" + } } -- cgit v1.2.3 From 507b3071633fccc9aa1411880dd984ca346a141b Mon Sep 17 00:00:00 2001 From: Alex Schofield Date: Mon, 19 Aug 2024 21:22:29 +0100 Subject: docs(tf): improve legibility of s3.tf sections --- terraform/s3.tf | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/terraform/s3.tf b/terraform/s3.tf index 97910c8..6ff58fd 100644 --- a/terraform/s3.tf +++ b/terraform/s3.tf @@ -1,4 +1,7 @@ -### EXTRACT BUCKET SET-UP +######################## +# EXTRACT BUCKET SETUP # +######################## + resource "aws_s3_bucket" "extract_bucket" { bucket_prefix = "${var.s3_extract_bucket_name}-" @@ -7,7 +10,10 @@ resource "aws_s3_bucket" "extract_bucket" { } } -### TRANSFORM BUCKET SET-UP +########################## +# TRANSFORM BUCKET SETUP # +########################## + resource "aws_s3_bucket" "transform_bucket" { bucket_prefix = "${var.s3_transform_bucket_name}-" tags = { @@ -15,7 +21,10 @@ resource "aws_s3_bucket" "transform_bucket" { } } -### LAMBDA BUCKET +####################### +# LAMBDA BUCKET SETUP # +####################### + resource "aws_s3_bucket" "lambda_code_bucket" { bucket_prefix = "${var.s3_code_bucket_name}-" tags = { -- cgit v1.2.3 From 1cb84bd663261c416a516b0dc59dbf8d62c4c1a7 Mon Sep 17 00:00:00 2001 From: Alex Schofield Date: Mon, 19 Aug 2024 21:23:07 +0100 Subject: docs(tf): correct lambda bucket name tag --- terraform/s3.tf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/terraform/s3.tf b/terraform/s3.tf index 6ff58fd..3e0f5b7 100644 --- a/terraform/s3.tf +++ b/terraform/s3.tf @@ -28,6 +28,6 @@ resource "aws_s3_bucket" "transform_bucket" { resource "aws_s3_bucket" "lambda_code_bucket" { bucket_prefix = "${var.s3_code_bucket_name}-" tags = { - Name = "Load Bucket" + Name = "Lambda Bucket" } } -- cgit v1.2.3 From 795c7c2917c2780e8ffdf0716cbedf3426dcbd5e Mon Sep 17 00:00:00 2001 From: Alex Schofield Date: Mon, 19 Aug 2024 21:25:21 +0100 Subject: infra(tf): experimental - add versioning to protect against accidental deletes/overwrites" --- terraform/s3.tf | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/terraform/s3.tf b/terraform/s3.tf index 3e0f5b7..edfe0a0 100644 --- a/terraform/s3.tf +++ b/terraform/s3.tf @@ -10,6 +10,13 @@ resource "aws_s3_bucket" "extract_bucket" { } } +resource "aws_s3_bucket_versioning" "extract_bucket_versioning" { + bucket = aws_s3_bucket.extract_bucket.id + versioning_configuration { + status = "Enabled" + } +} + ########################## # TRANSFORM BUCKET SETUP # ########################## @@ -21,6 +28,14 @@ resource "aws_s3_bucket" "transform_bucket" { } } + +resource "aws_s3_bucket_versioning" "transform_bucket_versioning" { + bucket = aws_s3_bucket.transform_bucket.id + versioning_configuration { + status = "Enabled" + } +} + ####################### # LAMBDA BUCKET SETUP # ####################### -- cgit v1.2.3 From 1bbc12702a8fa6d5139440c9d04e5bfabd96581d Mon Sep 17 00:00:00 2001 From: Alex Schofield Date: Mon, 19 Aug 2024 21:32:32 +0100 Subject: infra(tf): add versioning to lambda_code_bucket --- terraform/s3.tf | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/terraform/s3.tf b/terraform/s3.tf index edfe0a0..d17a4fe 100644 --- a/terraform/s3.tf +++ b/terraform/s3.tf @@ -46,3 +46,10 @@ resource "aws_s3_bucket" "lambda_code_bucket" { Name = "Lambda Bucket" } } + +resource "aws_s3_bucket_versioning" "lambda_bucket_versioning" { + bucket = aws_s3_bucket.lambda_code_bucket.id + versioning_configuration { + status = "Enabled" + } +} -- cgit v1.2.3 From b9a3d9dbaa1eedc25d5f8d12bd2be1a8a3841b42 Mon Sep 17 00:00:00 2001 From: Alex Schofield Date: Mon, 19 Aug 2024 21:39:05 +0100 Subject: docs(tf): improve legibility of events.tf sections --- terraform/events.tf | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/terraform/events.tf b/terraform/events.tf index 263141f..c2efbbc 100644 --- a/terraform/events.tf +++ b/terraform/events.tf @@ -1,3 +1,7 @@ +################# +# Random String # +################# + resource "random_string" "eventbridge_suffix" { length = 8 special = false @@ -16,6 +20,10 @@ resource "random_string" "s3_transform_suffix" { upper = false } +############################# +# EventBridge Configuration # +############################# + resource "aws_cloudwatch_event_rule" "lambda_trigger" { name = "lambda-scheduled-trigger" description = "Schedule to trigger the Lambda function" @@ -41,7 +49,10 @@ resource "aws_lambda_permission" "allow_eventbridge" { } } -# below is step function 1 +######################################## +# S3 Extract Bucket Notification Setup # +######################################## + resource "aws_lambda_permission" "allow_s3_ingestion" { statement_id = "AllowS3InvokeLambdaTransform${random_string.s3_ingestion_suffix.result}" action = "lambda:InvokeFunction" @@ -66,6 +77,10 @@ resource "aws_s3_bucket_notification" "extract_bucket_notification" { depends_on = [aws_lambda_permission.allow_s3_ingestion] } +########################################## +# S3 Transform Bucket Notification Setup # +########################################## + resource "aws_lambda_permission" "allow_s3_transform_bucket" { statement_id = "AllowS3InvokeLambdaTransform${random_string.s3_transform_suffix.result}" action = "lambda:InvokeFunction" -- cgit v1.2.3 From 09b8010a453c99164540981060177fdd2280df7e Mon Sep 17 00:00:00 2001 From: Alex Schofield Date: Mon, 19 Aug 2024 21:41:31 +0100 Subject: infra(tf): remove repetitive suffix resources in events.tf --- terraform/events.tf | 26 +++++++------------------- 1 file changed, 7 insertions(+), 19 deletions(-) diff --git a/terraform/events.tf b/terraform/events.tf index c2efbbc..832a965 100644 --- a/terraform/events.tf +++ b/terraform/events.tf @@ -2,19 +2,7 @@ # Random String # ################# -resource "random_string" "eventbridge_suffix" { - length = 8 - special = false - upper = false -} - -resource "random_string" "s3_ingestion_suffix" { - length = 8 - special = false - upper = false -} - -resource "random_string" "s3_transform_suffix" { +resource "random_string" "suffix" { length = 8 special = false upper = false @@ -38,14 +26,14 @@ resource "aws_cloudwatch_event_target" "extract_lambda_cw_event" { } resource "aws_lambda_permission" "allow_eventbridge" { - statement_id = "AllowExecutionFromEventBridge${random_string.eventbridge_suffix.result}" + statement_id = "AllowExecutionFromEventBridge${random_string.suffix.result}" action = "lambda:InvokeFunction" function_name = aws_lambda_function.extract_lambda.function_name principal = "events.amazonaws.com" source_arn = aws_cloudwatch_event_rule.lambda_trigger.arn lifecycle { - replace_triggered_by = [random_string.eventbridge_suffix] + replace_triggered_by = [random_string.suffix] } } @@ -54,14 +42,14 @@ resource "aws_lambda_permission" "allow_eventbridge" { ######################################## resource "aws_lambda_permission" "allow_s3_ingestion" { - statement_id = "AllowS3InvokeLambdaTransform${random_string.s3_ingestion_suffix.result}" + statement_id = "AllowS3InvokeLambdaTransform${random_string.suffix.result}" action = "lambda:InvokeFunction" function_name = aws_lambda_function.transform_lambda.function_name #replaced lambda name placeholder principal = "s3.amazonaws.com" source_arn = aws_s3_bucket.extract_bucket.arn #replaced bucket name placeholder lifecycle { - replace_triggered_by = [random_string.s3_ingestion_suffix] + replace_triggered_by = [random_string.suffix] } } @@ -82,14 +70,14 @@ resource "aws_s3_bucket_notification" "extract_bucket_notification" { ########################################## resource "aws_lambda_permission" "allow_s3_transform_bucket" { - statement_id = "AllowS3InvokeLambdaTransform${random_string.s3_transform_suffix.result}" + statement_id = "AllowS3InvokeLambdaTransform${random_string.suffix.result}" action = "lambda:InvokeFunction" function_name = aws_lambda_function.transform_lambda.function_name #replaced lambda name placeholder principal = "s3.amazonaws.com" source_arn = aws_s3_bucket.transform_bucket.arn #replaced bucket name placeholder lifecycle { - replace_triggered_by = [random_string.s3_transform_suffix] + replace_triggered_by = [random_string.suffix] } } -- cgit v1.2.3 From 367100c2d118847a775f4eba87a8c9033c872cb9 Mon Sep 17 00:00:00 2001 From: Alex Schofield Date: Mon, 19 Aug 2024 21:44:13 +0100 Subject: docs(tf): remove redundant comments --- terraform/events.tf | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/terraform/events.tf b/terraform/events.tf index 832a965..0113f5f 100644 --- a/terraform/events.tf +++ b/terraform/events.tf @@ -21,7 +21,7 @@ resource "aws_cloudwatch_event_rule" "lambda_trigger" { resource "aws_cloudwatch_event_target" "extract_lambda_cw_event" { rule = aws_cloudwatch_event_rule.lambda_trigger.name target_id = "TargetFunctionV1" - arn = aws_lambda_function.extract_lambda.arn #replaced lambda name placeholder + arn = aws_lambda_function.extract_lambda.arn depends_on = [aws_lambda_permission.allow_eventbridge] } @@ -44,9 +44,9 @@ resource "aws_lambda_permission" "allow_eventbridge" { resource "aws_lambda_permission" "allow_s3_ingestion" { statement_id = "AllowS3InvokeLambdaTransform${random_string.suffix.result}" action = "lambda:InvokeFunction" - function_name = aws_lambda_function.transform_lambda.function_name #replaced lambda name placeholder + function_name = aws_lambda_function.transform_lambda.function_name principal = "s3.amazonaws.com" - source_arn = aws_s3_bucket.extract_bucket.arn #replaced bucket name placeholder + source_arn = aws_s3_bucket.extract_bucket.arn lifecycle { replace_triggered_by = [random_string.suffix] @@ -55,11 +55,11 @@ resource "aws_lambda_permission" "allow_s3_ingestion" { resource "aws_s3_bucket_notification" "extract_bucket_notification" { - bucket = aws_s3_bucket.extract_bucket.id #replaced bucket name placeholder + bucket = aws_s3_bucket.extract_bucket.id lambda_function { events = ["s3:ObjectCreated:*"] - lambda_function_arn = aws_lambda_function.transform_lambda.arn #replaced lambda name placeholder + lambda_function_arn = aws_lambda_function.transform_lambda.arn } depends_on = [aws_lambda_permission.allow_s3_ingestion] @@ -72,9 +72,9 @@ resource "aws_s3_bucket_notification" "extract_bucket_notification" { resource "aws_lambda_permission" "allow_s3_transform_bucket" { statement_id = "AllowS3InvokeLambdaTransform${random_string.suffix.result}" action = "lambda:InvokeFunction" - function_name = aws_lambda_function.transform_lambda.function_name #replaced lambda name placeholder + function_name = aws_lambda_function.transform_lambda.function_name principal = "s3.amazonaws.com" - source_arn = aws_s3_bucket.transform_bucket.arn #replaced bucket name placeholder + source_arn = aws_s3_bucket.transform_bucket.arn lifecycle { replace_triggered_by = [random_string.suffix] @@ -83,11 +83,11 @@ resource "aws_lambda_permission" "allow_s3_transform_bucket" { resource "aws_s3_bucket_notification" "transform_bucket_notification" { - bucket = aws_s3_bucket.transform_bucket.id #replaced bucket name placeholder + bucket = aws_s3_bucket.transform_bucket.id lambda_function { events = ["s3:ObjectCreated:*"] - lambda_function_arn = aws_lambda_function.transform_lambda.arn #replaced lambda name placeholder + lambda_function_arn = aws_lambda_function.transform_lambda.arn } depends_on = [aws_lambda_permission.allow_s3_transform_bucket] -- cgit v1.2.3 From a9fb82f5c96e0ba98d6d3453ce900f2ca22157ec Mon Sep 17 00:00:00 2001 From: Alex Schofield Date: Mon, 19 Aug 2024 21:48:00 +0100 Subject: infra(tf): remove unused declaration in vars.tf --- terraform/vars.tf | 5 ----- 1 file changed, 5 deletions(-) diff --git a/terraform/vars.tf b/terraform/vars.tf index 1adbcf7..b3e3e47 100644 --- a/terraform/vars.tf +++ b/terraform/vars.tf @@ -33,11 +33,6 @@ variable "project_name" { default = "tt" } -variable "aws_region" { - type = string - default = "eu-west-2" -} - variable "environment" { type = string default = "dev" -- cgit v1.2.3 From c091506dc8e01741f54f9a8d289515c8d5ffbecf Mon Sep 17 00:00:00 2001 From: Alex Schofield Date: Mon, 19 Aug 2024 21:49:00 +0100 Subject: infra(tf): add version constraint for random in main.tf --- terraform/main.tf | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/terraform/main.tf b/terraform/main.tf index 33c760c..ad7b335 100644 --- a/terraform/main.tf +++ b/terraform/main.tf @@ -13,6 +13,10 @@ terraform { source = "hashicorp/archive" version = "~>2.5.0" } + random_string = { + source = "hashicorp/random" + version = "~>3.6.2" + } } backend "s3" { bucket = "bentley-project-secrets" -- cgit v1.2.3 From ce2761b311523a118cdead885ba7fcf1f7a4cd68 Mon Sep 17 00:00:00 2001 From: Alex Schofield Date: Mon, 19 Aug 2024 21:52:15 +0100 Subject: fix(tf): correct random_string to random in main.tf --- terraform/main.tf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/terraform/main.tf b/terraform/main.tf index ad7b335..6577b70 100644 --- a/terraform/main.tf +++ b/terraform/main.tf @@ -13,7 +13,7 @@ terraform { source = "hashicorp/archive" version = "~>2.5.0" } - random_string = { + random = { source = "hashicorp/random" version = "~>3.6.2" } -- cgit v1.2.3 From 88e71818aaf1bf67e4d2807d22d8122b7bf184f1 Mon Sep 17 00:00:00 2001 From: Alex Schofield Date: Mon, 19 Aug 2024 22:20:21 +0100 Subject: refactor(log): implement logging ancestry - avoid using root logger --- src/extract_lambda.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/extract_lambda.py b/src/extract_lambda.py index 15fe785..6f841b4 100644 --- a/src/extract_lambda.py +++ b/src/extract_lambda.py @@ -8,7 +8,7 @@ from datetime import datetime import re -logger = logging.getLogger() +logger = logging.getLogger(__name__) logger.setLevel(logging.INFO) # DB Exception class -- cgit v1.2.3 From 84b3dea3833ae65d53a1007567ee19c31bf34ee3 Mon Sep 17 00:00:00 2001 From: Alex Schofield Date: Mon, 19 Aug 2024 22:28:31 +0100 Subject: refactor(retrieve_secrets): use aws recommended method for retrieving secrets --- src/extract_lambda.py | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/src/extract_lambda.py b/src/extract_lambda.py index 6f841b4..1df4c34 100644 --- a/src/extract_lambda.py +++ b/src/extract_lambda.py @@ -55,18 +55,21 @@ def lambda_handler(event, context): db.close() -def retrieve_secrets( - sm_client=boto3.client("secretsmanager"), secret_name="bentley-secrets" -): +def retrieve_secrets(): + secret_name = "bentley-secrets" + region_name = "eu-west-2" + + # Create a Secrets Manager client + session = boto3.session.Session() + client = session.client(service_name="secretsmanager", region_name=region_name) + try: - response = sm_client.get_secret_value(SecretId=secret_name) - if "SecretString" in response: - secret = json.loads(response["SecretString"]) - return secret + get_secret_value_response = client.get_secret_value(SecretId=secret_name) except ClientError as e: - logger.error(f"Could not retrieve secrets: {e}") raise e + return get_secret_value_response["SecretString"] + def connect_to_database() -> Connection: try: -- cgit v1.2.3 From 3d4d74aa69db85e3c840b3b73c028f4e9f83d1f7 Mon Sep 17 00:00:00 2001 From: Alex Schofield Date: Mon, 19 Aug 2024 22:29:41 +0100 Subject: refactor(lambda_handler): remove unnecessary else statement --- src/extract_lambda.py | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/src/extract_lambda.py b/src/extract_lambda.py index 1df4c34..99117a4 100644 --- a/src/extract_lambda.py +++ b/src/extract_lambda.py @@ -39,14 +39,13 @@ def lambda_handler(event, context): "statusCode": 200, "body": json.dumps("No changes detected, no CSV files were uploaded."), } - else: - return { - "statusCode": 200, - "body": json.dumps( - f"""CSV files processed for {', '.join(any_changes['updated'])} and uploaded successfully.{ - 'The following tables were not updated: '+', '.join(any_changes['no change']) if any_changes['no change'] else ''}""" - ), - } + return { + "statusCode": 200, + "body": json.dumps( + f"""CSV files processed for {', '.join(any_changes['updated'])} and uploaded successfully.{ + 'The following tables were not updated: '+', '.join(any_changes['no change']) if any_changes['no change'] else ''}""" + ), + } except Exception as e: logger.error(f"Error: {e}") return {"statusCode": 500, "body": json.dumps("Internal server error.")} -- cgit v1.2.3 From 4699b3506307cb8556a7cc5f12fbe4df7a5c9a6b Mon Sep 17 00:00:00 2001 From: Alex Schofield Date: Mon, 19 Aug 2024 22:31:58 +0100 Subject: refactor(retrieve_secrets): improve error handling when retrieving secrets --- src/extract_lambda.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/extract_lambda.py b/src/extract_lambda.py index 99117a4..63a80ce 100644 --- a/src/extract_lambda.py +++ b/src/extract_lambda.py @@ -66,6 +66,9 @@ def retrieve_secrets(): get_secret_value_response = client.get_secret_value(SecretId=secret_name) except ClientError as e: raise e + except KeyError: + logger.error(f"Secret {secret_name} does not contain a SecretString") + raise ValueError(f"Secret {secret_name} does not contain a SecretString") return get_secret_value_response["SecretString"] -- cgit v1.2.3 From 8353621c862e75d1573ff8338852aa7d54d5d2e8 Mon Sep 17 00:00:00 2001 From: Alex Schofield Date: Mon, 19 Aug 2024 22:36:37 +0100 Subject: refactor(retrieve_secrets): add logging for ClientError --- src/extract_lambda.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/extract_lambda.py b/src/extract_lambda.py index 63a80ce..485c021 100644 --- a/src/extract_lambda.py +++ b/src/extract_lambda.py @@ -65,6 +65,7 @@ def retrieve_secrets(): try: get_secret_value_response = client.get_secret_value(SecretId=secret_name) except ClientError as e: + logger.error(f"Failed to retrieve secret {secret_name}: {str(e)}") raise e except KeyError: logger.error(f"Secret {secret_name} does not contain a SecretString") -- cgit v1.2.3 From bcbadd508dbc1a53864e64cb1e2eccce53daa187 Mon Sep 17 00:00:00 2001 From: Alex Schofield Date: Mon, 19 Aug 2024 22:37:43 +0100 Subject: chore: reorganise imports in extract_lambda --- src/extract_lambda.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/extract_lambda.py b/src/extract_lambda.py index 485c021..8353481 100644 --- a/src/extract_lambda.py +++ b/src/extract_lambda.py @@ -1,12 +1,12 @@ -from pg8000.native import Connection, InterfaceError, identifier -import boto3 import csv -from botocore.exceptions import ClientError -import logging import json -from datetime import datetime +import logging import re +from datetime import datetime +import boto3 +from botocore.exceptions import ClientError +from pg8000.native import Connection, InterfaceError, identifier logger = logging.getLogger(__name__) logger.setLevel(logging.INFO) -- cgit v1.2.3 From a8ce060732ed3064696f2d6c5459ffa176fd02f7 Mon Sep 17 00:00:00 2001 From: Alex Schofield Date: Mon, 19 Aug 2024 23:02:56 +0100 Subject: fix(tf): lambda permissions should be created before destroyed --- terraform/events.tf | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/terraform/events.tf b/terraform/events.tf index 0113f5f..9885a86 100644 --- a/terraform/events.tf +++ b/terraform/events.tf @@ -33,7 +33,8 @@ resource "aws_lambda_permission" "allow_eventbridge" { source_arn = aws_cloudwatch_event_rule.lambda_trigger.arn lifecycle { - replace_triggered_by = [random_string.suffix] + create_before_destroy = true + replace_triggered_by = [random_string.suffix] } } @@ -49,7 +50,8 @@ resource "aws_lambda_permission" "allow_s3_ingestion" { source_arn = aws_s3_bucket.extract_bucket.arn lifecycle { - replace_triggered_by = [random_string.suffix] + create_before_destroy = true + replace_triggered_by = [random_string.suffix] } } @@ -77,7 +79,8 @@ resource "aws_lambda_permission" "allow_s3_transform_bucket" { source_arn = aws_s3_bucket.transform_bucket.arn lifecycle { - replace_triggered_by = [random_string.suffix] + create_before_destroy = true + replace_triggered_by = [random_string.suffix] } } -- cgit v1.2.3 From b8574d4c4bf262a8034d21b770fd4287022c2648 Mon Sep 17 00:00:00 2001 From: Alex Schofield Date: Mon, 19 Aug 2024 23:07:28 +0100 Subject: fix(tf): re-add separate random_string suffixes in events.tf --- terraform/events.tf | 26 +++++++++++++++++++------- 1 file changed, 19 insertions(+), 7 deletions(-) diff --git a/terraform/events.tf b/terraform/events.tf index 9885a86..53ae10a 100644 --- a/terraform/events.tf +++ b/terraform/events.tf @@ -2,7 +2,19 @@ # Random String # ################# -resource "random_string" "suffix" { +resource "random_string" "eventbridge_suffix" { + length = 8 + special = false + upper = false +} + +resource "random_string" "s3_ingestion_suffix" { + length = 8 + special = false + upper = false +} + +resource "random_string" "s3_transform_suffix" { length = 8 special = false upper = false @@ -26,7 +38,7 @@ resource "aws_cloudwatch_event_target" "extract_lambda_cw_event" { } resource "aws_lambda_permission" "allow_eventbridge" { - statement_id = "AllowExecutionFromEventBridge${random_string.suffix.result}" + statement_id = "AllowExecutionFromEventBridge${random_string.eventbridge_suffix.result}" action = "lambda:InvokeFunction" function_name = aws_lambda_function.extract_lambda.function_name principal = "events.amazonaws.com" @@ -34,7 +46,7 @@ resource "aws_lambda_permission" "allow_eventbridge" { lifecycle { create_before_destroy = true - replace_triggered_by = [random_string.suffix] + replace_triggered_by = [random_string.eventbridge_suffix] } } @@ -43,7 +55,7 @@ resource "aws_lambda_permission" "allow_eventbridge" { ######################################## resource "aws_lambda_permission" "allow_s3_ingestion" { - statement_id = "AllowS3InvokeLambdaTransform${random_string.suffix.result}" + statement_id = "AllowS3InvokeLambdaTransform${random_string.s3_ingestion_suffix.result}" action = "lambda:InvokeFunction" function_name = aws_lambda_function.transform_lambda.function_name principal = "s3.amazonaws.com" @@ -51,7 +63,7 @@ resource "aws_lambda_permission" "allow_s3_ingestion" { lifecycle { create_before_destroy = true - replace_triggered_by = [random_string.suffix] + replace_triggered_by = [random_string.s3_ingestion_suffix] } } @@ -72,7 +84,7 @@ resource "aws_s3_bucket_notification" "extract_bucket_notification" { ########################################## resource "aws_lambda_permission" "allow_s3_transform_bucket" { - statement_id = "AllowS3InvokeLambdaTransform${random_string.suffix.result}" + statement_id = "AllowS3InvokeLambdaTransform${random_string.s3_transform_suffix.result}" action = "lambda:InvokeFunction" function_name = aws_lambda_function.transform_lambda.function_name principal = "s3.amazonaws.com" @@ -80,7 +92,7 @@ resource "aws_lambda_permission" "allow_s3_transform_bucket" { lifecycle { create_before_destroy = true - replace_triggered_by = [random_string.suffix] + replace_triggered_by = [random_string.s3_transform_suffix] } } -- cgit v1.2.3 From caed81dc699b9b4105da2b8924310f1a370217c7 Mon Sep 17 00:00:00 2001 From: Alex Schofield Date: Mon, 19 Aug 2024 23:13:39 +0100 Subject: refactor: add timestamp function in extract_lambda.py --- src/extract_lambda.py | 29 +++++++++++++++++------------ 1 file changed, 17 insertions(+), 12 deletions(-) diff --git a/src/extract_lambda.py b/src/extract_lambda.py index 8353481..ad3c970 100644 --- a/src/extract_lambda.py +++ b/src/extract_lambda.py @@ -129,6 +129,16 @@ def list_existing_s3_files(bucket_name=extract_bucket(), client=boto3.client("s3 return existing_files +def get_latest_timestamp(existing_files): + all_datetimes = [] + for file_name in existing_files.keys(): + match = re.search(r"\/(.+/).+_(.+)\.csv", file_name) + if match: + datetime_str = "".join(match.group(1, 2)) + all_datetimes.append(datetime.strptime(datetime_str, "%Y/%m/%d/%H:%M:%S")) + return max(all_datetimes) if all_datetimes else datetime.min + + def process_and_upload_tables(db, existing_files, client=boto3.client("s3")): """Creates a list of the tables from a database query and then selects everything from each table in individual queries @@ -137,22 +147,17 @@ def process_and_upload_tables(db, existing_files, client=boto3.client("s3")): to files, or new tables/files it uploads them to the s3 bucket """ load_status = {"updated": [], "no change": []} - # Retrieving the latest file timestamp from S3 extract bucket - all_datetimes = [] - for file_names in existing_files.keys(): - datetime_str_on_s3 = "".join( - re.search(r"\/(.+/).+_(.+)\.csv", file_names).group(1, 2) - ) - all_datetimes.append(datetime.strptime(datetime_str_on_s3, "%Y/%m/%d/%H:%M:%S")) - latest_timestamp = max(all_datetimes) + latest_timestamp = get_latest_timestamp(existing_files) - # Iterating through tables on the database and retrieving only latest changes vs previous file load tables = db.run( """ - SELECT table_name - FROM information_schema.tables - WHERE table_schema='public' AND table_type='BASE TABLE';""" + SELECT table_name + FROM information_schema.tables + WHERE table_schema='public' + AND table_type='BASE TABLE'; + """ ) + for table in tables: print(tables) table_name = table[0] -- cgit v1.2.3 From 610d23e7ed0f39e5ecb0dd25c3a1e3cba20d662e Mon Sep 17 00:00:00 2001 From: Alex Schofield Date: Mon, 19 Aug 2024 23:26:58 +0100 Subject: refactor: remove print statements in process_and_upload_tables --- src/extract_lambda.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/extract_lambda.py b/src/extract_lambda.py index ad3c970..7c6c3d1 100644 --- a/src/extract_lambda.py +++ b/src/extract_lambda.py @@ -159,13 +159,11 @@ def process_and_upload_tables(db, existing_files, client=boto3.client("s3")): ) for table in tables: - print(tables) table_name = table[0] rows = db.run( f"SELECT * FROM {identifier(table_name)} WHERE last_updated >= :latest;", latest={datetime.strftime(latest_timestamp, "%Y-%m-%d %H:%M:%S")}, ) - print("rows", rows) # Creating a temporary file path and writing the column name to it followed by each row of data if rows: csv_file_path = f"/tmp/{table_name}.csv" -- cgit v1.2.3 From 5be3b130170c82360ff9715f5c09b9e815fc16f4 Mon Sep 17 00:00:00 2001 From: Alex Schofield Date: Mon, 19 Aug 2024 23:32:25 +0100 Subject: feat: use buffers for s3 upload instead of csv files --- src/extract_lambda.py | 50 +++++++++++++++++++++++++++++++++----------------- 1 file changed, 33 insertions(+), 17 deletions(-) diff --git a/src/extract_lambda.py b/src/extract_lambda.py index 7c6c3d1..f38e24a 100644 --- a/src/extract_lambda.py +++ b/src/extract_lambda.py @@ -3,6 +3,7 @@ import json import logging import re from datetime import datetime +from io import StringIO import boto3 from botocore.exceptions import ClientError @@ -139,6 +140,26 @@ def get_latest_timestamp(existing_files): return max(all_datetimes) if all_datetimes else datetime.min +def stream_to_s3(table_name, rows, column_names, s3_client, bucket_name, s3_key): + csv_buffer = StringIO() + csv_writer = csv.writer(csv_buffer) + + csv_writer.writerow(column_names) + + for row in rows: + csv_writer.writerow(row) + + if csv_buffer.tell() > 5 * 1024 * 1024: + csv_buffer.seek(0) + s3_client.upload_fileobj(csv_buffer, bucket_name, s3_key) + csv_buffer.truncate(0) + csv_buffer.seek(0) + + if csv_buffer.tell() > 0: + csv_buffer.seek(0) + s3_client.upload_fileobj(csv_buffer, bucket_name, s3_key) + + def process_and_upload_tables(db, existing_files, client=boto3.client("s3")): """Creates a list of the tables from a database query and then selects everything from each table in individual queries @@ -164,29 +185,24 @@ def process_and_upload_tables(db, existing_files, client=boto3.client("s3")): f"SELECT * FROM {identifier(table_name)} WHERE last_updated >= :latest;", latest={datetime.strftime(latest_timestamp, "%Y-%m-%d %H:%M:%S")}, ) - # Creating a temporary file path and writing the column name to it followed by each row of data if rows: - csv_file_path = f"/tmp/{table_name}.csv" - with open(csv_file_path, "w", newline="") as file: - writer = csv.writer(file) - # column_names = [desc["name"] for desc in db.columns(f"SELECT * FROM {table_name};")] - column_names = [ - col_name[0] - for col_name in db.run( - """SELECT column_name FROM INFORMATION_SCHEMA.COLUMNS - WHERE table_name = :table ;""", - table=table_name, - ) - ] - writer.writerow(column_names) - writer.writerows(rows) + column_names = [ + col_name[0] + for col_name in db.run( + """SELECT column_name FROM INFORMATION_SCHEMA.COLUMNS + WHERE table_name = :table ;""", + table=table_name, + ) + ] + s3_key = datetime.strftime( datetime.today(), f"{table_name}/%Y/%m/%d/{table_name}_%H:%M:%S.csv" ) - # Writing the new file to S3 extract bucket: try: - client.upload_file(csv_file_path, extract_bucket(), s3_key) + stream_to_s3( + table_name, rows, column_names, client, extract_bucket(), s3_key + ) load_status["updated"].append(table_name) logger.info(f"Uploaded {s3_key} to S3.") except ClientError as e: -- cgit v1.2.3 From 3e80acb28eeeb0eaff97c2363124a8c6e95bcb13 Mon Sep 17 00:00:00 2001 From: Alex Schofield Date: Mon, 19 Aug 2024 23:44:52 +0100 Subject: refactor: optimise s3 streaming & file naming --- src/extract_lambda.py | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) diff --git a/src/extract_lambda.py b/src/extract_lambda.py index f38e24a..8575b08 100644 --- a/src/extract_lambda.py +++ b/src/extract_lambda.py @@ -149,15 +149,9 @@ def stream_to_s3(table_name, rows, column_names, s3_client, bucket_name, s3_key) for row in rows: csv_writer.writerow(row) - if csv_buffer.tell() > 5 * 1024 * 1024: - csv_buffer.seek(0) - s3_client.upload_fileobj(csv_buffer, bucket_name, s3_key) - csv_buffer.truncate(0) - csv_buffer.seek(0) + csv_buffer.seek(0) - if csv_buffer.tell() > 0: - csv_buffer.seek(0) - s3_client.upload_fileobj(csv_buffer, bucket_name, s3_key) + s3_client.upload_fileobj(csv_buffer, bucket_name, s3_key) def process_and_upload_tables(db, existing_files, client=boto3.client("s3")): @@ -190,13 +184,14 @@ def process_and_upload_tables(db, existing_files, client=boto3.client("s3")): col_name[0] for col_name in db.run( """SELECT column_name FROM INFORMATION_SCHEMA.COLUMNS - WHERE table_name = :table ;""", + WHERE table_name = :table ;""", table=table_name, ) ] - s3_key = datetime.strftime( - datetime.today(), f"{table_name}/%Y/%m/%d/{table_name}_%H:%M:%S.csv" + s3_key = ( + f"{table_name}/{datetime.now().strftime('%Y/%m/%d')}/" + f"{table_name}_{datetime.now().strftime('%H:%M:%S')}.csv" ) try: -- cgit v1.2.3 From 7c77382fdaf236247a35f35810d66a86923156dd Mon Sep 17 00:00:00 2001 From: Alex Schofield Date: Tue, 20 Aug 2024 00:10:40 +0100 Subject: fix(): update expected response message --- tests/test_extract_lambda.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/tests/test_extract_lambda.py b/tests/test_extract_lambda.py index 3931cfc..9362a6c 100644 --- a/tests/test_extract_lambda.py +++ b/tests/test_extract_lambda.py @@ -72,7 +72,11 @@ class TestLambdaHandler: ] with patch("src.extract_lambda.connect_to_database", return_value=mock_db): mock_process_and_upload_tables = mocker.patch( - "src.extract_lambda.process_and_upload_tables", return_value=mock_db + "src.extract_lambda.process_and_upload_tables", + return_value={ + "updated": ["Fruits"], + "no change": ["Vegetable", "Berry"], + }, ) mock_list_existing_s3_files = mocker.patch( "src.extract_lambda.list_existing_s3_files", return_value={} @@ -81,9 +85,9 @@ class TestLambdaHandler: context = {} response = lambda_handler(event, context) assert response["statusCode"] == 200 - assert ( - json.loads(response["body"]) - == "CSV files processed and uploaded successfully." + assert json.loads(response["body"]) == ( + "CSV files processed for Fruits and uploaded successfully." + "The following tables were not updated: Vegetable, Berry" ) mock_list_existing_s3_files.assert_called_once() mock_process_and_upload_tables.assert_called_once_with(mock_db, {}) -- cgit v1.2.3 From bf7aab5cdbf2007824f0fb2bff2de5a4fa8196ba Mon Sep 17 00:00:00 2001 From: Alex Schofield Date: Tue, 20 Aug 2024 00:11:37 +0100 Subject: chore(tests): rename lambda_handler class test functions --- tests/test_extract_lambda.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/test_extract_lambda.py b/tests/test_extract_lambda.py index 9362a6c..b9e3a4b 100644 --- a/tests/test_extract_lambda.py +++ b/tests/test_extract_lambda.py @@ -58,7 +58,7 @@ def s3_mock_bucket(s3_client): class TestLambdaHandler: - def test_lambda_handler_files_processed_and_uploaded_successfully(self, mocker): + def test_files_processed_and_uploaded_successfully(self, mocker): mock_db = MagicMock() mock_db.run.side_effect = [ [["Fruits"]], @@ -93,7 +93,7 @@ class TestLambdaHandler: mock_process_and_upload_tables.assert_called_once_with(mock_db, {}) mock_db.close.assert_called_once() - def test_lambda_handler_no_changes_detected_no_files_uploaded(self, mocker): + def test_no_changes_detected_no_files_uploaded(self, mocker): mock_db = MagicMock() mock_db.run.side_effect = [ [["Fruits"]], @@ -125,7 +125,7 @@ class TestLambdaHandler: mock_process_and_upload_tables.assert_called_once_with(mock_db, {}) mock_db.close.assert_called_once() - def test_lambda_handler_exception_error(self, mocker): + def test_exception_error(self, mocker): with patch( "src.extract_lambda.connect_to_database", side_effect=Exception("Database connection error"), -- cgit v1.2.3 From 32175a3b4387a8218b4e21561173445fd5b5df1d Mon Sep 17 00:00:00 2001 From: Alex Schofield Date: Tue, 20 Aug 2024 00:13:45 +0100 Subject: fix(): update expected response message for second test --- tests/test_extract_lambda.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/test_extract_lambda.py b/tests/test_extract_lambda.py index b9e3a4b..3d15927 100644 --- a/tests/test_extract_lambda.py +++ b/tests/test_extract_lambda.py @@ -108,7 +108,8 @@ class TestLambdaHandler: with patch("src.extract_lambda.connect_to_database", return_value=mock_db): mock_process_and_upload_tables = mocker.patch( - "src.extract_lambda.process_and_upload_tables", return_value=False + "src.extract_lambda.process_and_upload_tables", + return_value={"updated": [], "no change": ["Fruits"]}, ) mock_list_existing_s3_files = mocker.patch( "src.extract_lambda.list_existing_s3_files", return_value={} -- cgit v1.2.3 From 640b0685cd795c03b571b3ca26fc9030b86c4f99 Mon Sep 17 00:00:00 2001 From: Alex Schofield Date: Tue, 20 Aug 2024 00:18:16 +0100 Subject: fix(extract_lambda): fix UnboundLocalError when db is called before it is assigned a value --- src/extract_lambda.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/extract_lambda.py b/src/extract_lambda.py index 8575b08..7efaac0 100644 --- a/src/extract_lambda.py +++ b/src/extract_lambda.py @@ -29,6 +29,7 @@ def lambda_handler(event, context): and converts all tables to CSV and if any of those tables do not exist in, or are different to the ones in s3, it uploads them it uses 3 helper functions to achieve these 3 functionalities """ + db = None try: db = connect_to_database() existing_files = list_existing_s3_files() -- cgit v1.2.3 From 746f4206b2f30126c3c09ac11a2d49be3259fe6f Mon Sep 17 00:00:00 2001 From: Alex Schofield Date: Tue, 20 Aug 2024 00:42:54 +0100 Subject: infra(tf): add secrets manager permissions I feel like what I've done is bad but we'll find out soon. --- terraform/iam.tf | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/terraform/iam.tf b/terraform/iam.tf index 7585ff8..a36cfdf 100644 --- a/terraform/iam.tf +++ b/terraform/iam.tf @@ -169,3 +169,30 @@ resource "aws_iam_role_policy_attachment" "cloudwatch_events_attachment" { role = aws_iam_role.multi_service_role.name policy_arn = aws_iam_policy.cloudwatch_events_policy.arn } + +######################### +# SECRETS MANAGER SETUP # +######################### + +# Policy Doc +data "aws_iam_policy_document" "secrets_manager_policy_doc" { + statement { + effect = "Allow" + actions = [ + "secretsmanager:GetSecretValue" + ] + resources = [] + } +} + +# SM Policy Resource +resource "aws_iam_policy" "secrets_manager_policy" { + name = "secrets_manager_policy" + policy = data.aws_iam_policy_document.secrets_manager_policy_doc.json +} + +# Attach SM Policy to Role +resource "aws_iam_role_policy_attachment" "secrets_manager_attachment" { + role = aws_iam_role.multi_service_role.name + policy_arn = aws_iam_policy.secrets_manager_policy.arn +} -- cgit v1.2.3 From 2045888e1ae497444c58347096547f0475bba7a1 Mon Sep 17 00:00:00 2001 From: Alex Schofield Date: Tue, 20 Aug 2024 00:51:11 +0100 Subject: infra(tf): add resource access for secrets-manager policy doc --- terraform/iam.tf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/terraform/iam.tf b/terraform/iam.tf index a36cfdf..a8054ca 100644 --- a/terraform/iam.tf +++ b/terraform/iam.tf @@ -181,7 +181,7 @@ data "aws_iam_policy_document" "secrets_manager_policy_doc" { actions = [ "secretsmanager:GetSecretValue" ] - resources = [] + resources = ["arn:aws:secretsmanager:${data.aws_region.current.name}:${data.aws_caller_identity.current.account_id}:secret:bentley-secrets-Na0yc8"] } } -- cgit v1.2.3 From d34ad9649648c178ac24b58832982f5c37aca48e Mon Sep 17 00:00:00 2001 From: Alex Schofield Date: Tue, 20 Aug 2024 01:03:34 +0100 Subject: fix(extract_lambda): parse secrets string as json dict to access secret values --- src/extract_lambda.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/extract_lambda.py b/src/extract_lambda.py index 7efaac0..9de6214 100644 --- a/src/extract_lambda.py +++ b/src/extract_lambda.py @@ -78,7 +78,7 @@ def retrieve_secrets(): def connect_to_database() -> Connection: try: - secrets = retrieve_secrets() + secrets = json.loads(retrieve_secrets()) host = secrets["host"] port = secrets["port"] user = secrets["user"] -- cgit v1.2.3 From ae57535d9f201d6fd749d4286551884d3c86fd60 Mon Sep 17 00:00:00 2001 From: Alex Schofield Date: Tue, 20 Aug 2024 10:26:48 +0100 Subject: infra(tf): add missing ListObjectsV2 permission --- terraform/iam.tf | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/terraform/iam.tf b/terraform/iam.tf index a8054ca..3ac8c45 100644 --- a/terraform/iam.tf +++ b/terraform/iam.tf @@ -40,7 +40,8 @@ data "aws_iam_policy_document" "s3_data_policy_doc" { "s3:PutObjectRetention", "s3:PutObjectTagging", "s3:PutObjectAcl", - "s3:ListObjects" + "s3:ListObjects", + "s3:ListObjectsV2" ] resources = [ "${aws_s3_bucket.extract_bucket.arn}/*", -- cgit v1.2.3 From e25bee6c1c9db8edaf3197f0dc48fa3c63e61744 Mon Sep 17 00:00:00 2001 From: Alex Schofield Date: Tue, 20 Aug 2024 11:01:55 +0100 Subject: feat: revert s3 streaming to previous implementation for uploading --- src/extract_lambda.py | 56 +++++++++++++++++++++++---------------------------- 1 file changed, 25 insertions(+), 31 deletions(-) diff --git a/src/extract_lambda.py b/src/extract_lambda.py index 7efaac0..4921034 100644 --- a/src/extract_lambda.py +++ b/src/extract_lambda.py @@ -49,7 +49,7 @@ def lambda_handler(event, context): ), } except Exception as e: - logger.error(f"Error: {e}") + logger.error(f"Error: {e}", exc_info=True) return {"statusCode": 500, "body": json.dumps("Internal server error.")} finally: if db: @@ -78,7 +78,7 @@ def retrieve_secrets(): def connect_to_database() -> Connection: try: - secrets = retrieve_secrets() + secrets = json.loads(retrieve_secrets()) host = secrets["host"] port = secrets["port"] user = secrets["user"] @@ -141,20 +141,6 @@ def get_latest_timestamp(existing_files): return max(all_datetimes) if all_datetimes else datetime.min -def stream_to_s3(table_name, rows, column_names, s3_client, bucket_name, s3_key): - csv_buffer = StringIO() - csv_writer = csv.writer(csv_buffer) - - csv_writer.writerow(column_names) - - for row in rows: - csv_writer.writerow(row) - - csv_buffer.seek(0) - - s3_client.upload_fileobj(csv_buffer, bucket_name, s3_key) - - def process_and_upload_tables(db, existing_files, client=boto3.client("s3")): """Creates a list of the tables from a database query and then selects everything from each table in individual queries @@ -180,25 +166,29 @@ def process_and_upload_tables(db, existing_files, client=boto3.client("s3")): f"SELECT * FROM {identifier(table_name)} WHERE last_updated >= :latest;", latest={datetime.strftime(latest_timestamp, "%Y-%m-%d %H:%M:%S")}, ) + # Creating a temporary file path and writing the column name to it followed by each row of data if rows: - column_names = [ - col_name[0] - for col_name in db.run( - """SELECT column_name FROM INFORMATION_SCHEMA.COLUMNS - WHERE table_name = :table ;""", - table=table_name, - ) - ] - - s3_key = ( - f"{table_name}/{datetime.now().strftime('%Y/%m/%d')}/" - f"{table_name}_{datetime.now().strftime('%H:%M:%S')}.csv" + csv_file_path = f"/tmp/{table_name}.csv" + with open(csv_file_path, "w", newline="") as file: + writer = csv.writer(file) + # column_names = [desc["name"] for desc in db.columns(f"SELECT * FROM {table_name};")] + column_names = [ + col_name[0] + for col_name in db.run( + """SELECT column_name FROM INFORMATION_SCHEMA.COLUMNS + WHERE table_name = :table ;""", + table=table_name, + ) + ] + writer.writerow(column_names) + writer.writerows(rows) + s3_key = datetime.strftime( + datetime.today(), f"{table_name}/%Y/%m/%d/{table_name}_%H:%M:%S.csv" ) + # Writing the new file to S3 extract bucket: try: - stream_to_s3( - table_name, rows, column_names, client, extract_bucket(), s3_key - ) + client.upload_file(csv_file_path, extract_bucket(), s3_key) load_status["updated"].append(table_name) logger.info(f"Uploaded {s3_key} to S3.") except ClientError as e: @@ -207,3 +197,7 @@ def process_and_upload_tables(db, existing_files, client=boto3.client("s3")): load_status["no change"].append(table_name) logger.info(f"No new data") return load_status + + +if __name__ == "__main__": + lambda_handler(None, None) -- cgit v1.2.3 From 5211751b69a894874945e3a916c33781a327ab10 Mon Sep 17 00:00:00 2001 From: Alex Schofield Date: Tue, 20 Aug 2024 11:26:26 +0100 Subject: feat: conditional logic for if bucket is empty --- src/extract_lambda.py | 32 +++++++++++++++++++++++--------- 1 file changed, 23 insertions(+), 9 deletions(-) diff --git a/src/extract_lambda.py b/src/extract_lambda.py index 4921034..6216446 100644 --- a/src/extract_lambda.py +++ b/src/extract_lambda.py @@ -124,6 +124,7 @@ def list_existing_s3_files(bucket_name=extract_bucket(), client=boto3.client("s3 logger.error(f"Error retrieving S3 object {s3_key}: {e}") else: logger.error("The bucket is empty") + return None except ClientError as e: logger.error(f"Error listing S3 objects: {e}") @@ -132,13 +133,18 @@ def list_existing_s3_files(bucket_name=extract_bucket(), client=boto3.client("s3 def get_latest_timestamp(existing_files): - all_datetimes = [] - for file_name in existing_files.keys(): - match = re.search(r"\/(.+/).+_(.+)\.csv", file_name) - if match: - datetime_str = "".join(match.group(1, 2)) - all_datetimes.append(datetime.strptime(datetime_str, "%Y/%m/%d/%H:%M:%S")) - return max(all_datetimes) if all_datetimes else datetime.min + if existing_files: + all_datetimes = [] + for file_name in existing_files.keys(): + match = re.search(r"\/(.+/).+_(.+)\.csv", file_name) + if match: + datetime_str = "".join(match.group(1, 2)) + all_datetimes.append( + datetime.strptime(datetime_str, "%Y/%m/%d/%H:%M:%S") + ) + return max(all_datetimes) if all_datetimes else datetime.min + + return existing_files def process_and_upload_tables(db, existing_files, client=boto3.client("s3")): @@ -163,8 +169,16 @@ def process_and_upload_tables(db, existing_files, client=boto3.client("s3")): for table in tables: table_name = table[0] rows = db.run( - f"SELECT * FROM {identifier(table_name)} WHERE last_updated >= :latest;", - latest={datetime.strftime(latest_timestamp, "%Y-%m-%d %H:%M:%S")}, + f""" + SELECT * FROM {identifier(table_name)} + WHERE last_updated >= :latest; + """, + latest={ + datetime.strftime( + latest_timestamp if latest_timestamp else datetime(1990, 1, 1), + "%Y-%m-%d %H:%M:%S", + ) + }, ) # Creating a temporary file path and writing the column name to it followed by each row of data if rows: -- cgit v1.2.3 From dc3a7e74ddf549dad05745c64201aaf0d3402213 Mon Sep 17 00:00:00 2001 From: Alex Schofield Date: Tue, 20 Aug 2024 11:31:25 +0100 Subject: feat: add advanced logging --- src/extract_lambda.py | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/src/extract_lambda.py b/src/extract_lambda.py index 6216446..9daf662 100644 --- a/src/extract_lambda.py +++ b/src/extract_lambda.py @@ -10,8 +10,12 @@ from botocore.exceptions import ClientError from pg8000.native import Connection, InterfaceError, identifier logger = logging.getLogger(__name__) -logger.setLevel(logging.INFO) - +logging.basicConfig( + format="{asctime} - {levelname} - {message}", + style="{", + datefmt="%Y-%m-%d %H:%M", + level=logging.INFO, +) # DB Exception class @@ -168,11 +172,13 @@ def process_and_upload_tables(db, existing_files, client=boto3.client("s3")): for table in tables: table_name = table[0] - rows = db.run( - f""" + base_query = f""" SELECT * FROM {identifier(table_name)} WHERE last_updated >= :latest; - """, + """ + logger.info(f"Processing table: {table_name}") + rows = db.run( + base_query, latest={ datetime.strftime( latest_timestamp if latest_timestamp else datetime(1990, 1, 1), -- cgit v1.2.3 From 35397e8bad42a8c507d1fb13007c6da2f947e851 Mon Sep 17 00:00:00 2001 From: Alex Schofield Date: Tue, 20 Aug 2024 11:44:30 +0100 Subject: feat: add additional logging and exclude unnecessary table --- src/extract_lambda.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/src/extract_lambda.py b/src/extract_lambda.py index 9daf662..fe22192 100644 --- a/src/extract_lambda.py +++ b/src/extract_lambda.py @@ -165,7 +165,7 @@ def process_and_upload_tables(db, existing_files, client=boto3.client("s3")): """ SELECT table_name FROM information_schema.tables - WHERE table_schema='public' + WHERE table_schema='public' AND table_name != '_prisma_migrations' AND table_type='BASE TABLE'; """ ) @@ -176,16 +176,18 @@ def process_and_upload_tables(db, existing_files, client=boto3.client("s3")): SELECT * FROM {identifier(table_name)} WHERE last_updated >= :latest; """ - logger.info(f"Processing table: {table_name}") - rows = db.run( - base_query, - latest={ + latest = ( + { datetime.strftime( latest_timestamp if latest_timestamp else datetime(1990, 1, 1), "%Y-%m-%d %H:%M:%S", ) }, ) + logger.info(f"Processing table: {table_name}") + logger.info(f"Latest timestamp: {latest[0]}") + rows = db.run(base_query, latest=latest) + logger.info(f"Rows: {rows}") # Creating a temporary file path and writing the column name to it followed by each row of data if rows: csv_file_path = f"/tmp/{table_name}.csv" -- cgit v1.2.3 From be911e22a964bdf7d5a4421cde7d7c6df447ed5c Mon Sep 17 00:00:00 2001 From: Alex Schofield Date: Tue, 20 Aug 2024 11:49:59 +0100 Subject: refactor: change rows output to debug logger output --- src/extract_lambda.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/extract_lambda.py b/src/extract_lambda.py index fe22192..e9f438b 100644 --- a/src/extract_lambda.py +++ b/src/extract_lambda.py @@ -16,7 +16,6 @@ logging.basicConfig( datefmt="%Y-%m-%d %H:%M", level=logging.INFO, ) -# DB Exception class class DBConnectionException(Exception): @@ -187,7 +186,7 @@ def process_and_upload_tables(db, existing_files, client=boto3.client("s3")): logger.info(f"Processing table: {table_name}") logger.info(f"Latest timestamp: {latest[0]}") rows = db.run(base_query, latest=latest) - logger.info(f"Rows: {rows}") + logger.debug(f"Rows: {rows}") # Creating a temporary file path and writing the column name to it followed by each row of data if rows: csv_file_path = f"/tmp/{table_name}.csv" -- cgit v1.2.3 From e788a90307831d968fcac51dc5d70d356a5a5f63 Mon Sep 17 00:00:00 2001 From: lian-manonog Date: Tue, 20 Aug 2024 12:05:56 +0100 Subject: Complete: completed testing for extract bucket - all passing --- tests/test_extract_lambda.py | 29 +++++++++++++++++++++++++++-- 1 file changed, 27 insertions(+), 2 deletions(-) diff --git a/tests/test_extract_lambda.py b/tests/test_extract_lambda.py index 3d15927..3cd2405 100644 --- a/tests/test_extract_lambda.py +++ b/tests/test_extract_lambda.py @@ -15,6 +15,7 @@ from src.extract_lambda import ( lambda_handler, process_and_upload_tables, retrieve_secrets, + extract_bucket ) @@ -146,6 +147,31 @@ class TestLambdaHandler: mock_process_and_upload_tables.assert_not_called() +class TestExtractBucket: + def test_extract_bucket_returns_bucket_name(self, s3_client, s3_mock_bucket): + result = extract_bucket(s3_client) + assert result == "extract_bucket" + + def test_bucket_returns_first_bucket(self, s3_client): + bucket1 = s3_client.create_bucket( + Bucket='bucket1', + CreateBucketConfiguration={"LocationConstraint": "eu-west-2"}, + ) + result = extract_bucket(s3_client) + assert result == "extract_bucket" + + def test_returns_index_error_if_no_buckets(self, s3_client): + s3_client.delete_bucket( + Bucket="extract_bucket" + ) + s3_client.delete_bucket( + Bucket="bucket1" + ) + + with pytest.raises(IndexError, match="list index out of range"): + extract_bucket(s3_client) + + class TestListExistingS3Files: def test_error_if_no_bucket(self, s3_client, caplog): logger = logging.getLogger() @@ -165,7 +191,6 @@ class TestListExistingS3Files: class TestConnectToDatabase: - # had mock_config in param def test_connect_to_database(mock_conn, mock_config): with patch("src.extract_lambda.Connection", autospec=True) as mock_conn: connect_to_database() @@ -187,7 +212,7 @@ class TestConnectToDatabase: class TestProcessAndUploadTables: - def test_error_process_and_upload_tables(mock_conn, s3_client, caplog): + def test_error_process_and_upload_tables(self, mock_conn, s3_client, caplog): caplog.set_level(logging.INFO) # Mock return values for database queries -- cgit v1.2.3 From 346aadfbf2208a0660ffc09959a91fc2f7b48c79 Mon Sep 17 00:00:00 2001 From: Alex Schofield Date: Tue, 20 Aug 2024 12:07:17 +0100 Subject: infra(tf): force-destroy buckets --- terraform/s3.tf | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/terraform/s3.tf b/terraform/s3.tf index d17a4fe..14e8835 100644 --- a/terraform/s3.tf +++ b/terraform/s3.tf @@ -4,7 +4,7 @@ resource "aws_s3_bucket" "extract_bucket" { bucket_prefix = "${var.s3_extract_bucket_name}-" - + force_destroy = true tags = { Name = "Ingestion Bucket" } @@ -23,6 +23,7 @@ resource "aws_s3_bucket_versioning" "extract_bucket_versioning" { resource "aws_s3_bucket" "transform_bucket" { bucket_prefix = "${var.s3_transform_bucket_name}-" + force_destroy = true tags = { Name = "Transform Bucket" } @@ -42,6 +43,7 @@ resource "aws_s3_bucket_versioning" "transform_bucket_versioning" { resource "aws_s3_bucket" "lambda_code_bucket" { bucket_prefix = "${var.s3_code_bucket_name}-" + force_destroy = true tags = { Name = "Lambda Bucket" } -- cgit v1.2.3 From 0870dc49ddbd6024dddb289909487a15c26a3383 Mon Sep 17 00:00:00 2001 From: "deepsource-autofix[bot]" <62050782+deepsource-autofix[bot]@users.noreply.github.com> Date: Tue, 20 Aug 2024 11:08:24 +0000 Subject: style: format code with Autopep8, Black and Ruff Formatter This commit fixes the style issues introduced in e788a90 according to the output from Autopep8, Black and Ruff Formatter. Details: https://github.com/ajschofield/de-project-bentley/pull/72 --- tests/test_extract_lambda.py | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/tests/test_extract_lambda.py b/tests/test_extract_lambda.py index 3cd2405..548ce67 100644 --- a/tests/test_extract_lambda.py +++ b/tests/test_extract_lambda.py @@ -15,7 +15,7 @@ from src.extract_lambda import ( lambda_handler, process_and_upload_tables, retrieve_secrets, - extract_bucket + extract_bucket, ) @@ -154,24 +154,20 @@ class TestExtractBucket: def test_bucket_returns_first_bucket(self, s3_client): bucket1 = s3_client.create_bucket( - Bucket='bucket1', + Bucket="bucket1", CreateBucketConfiguration={"LocationConstraint": "eu-west-2"}, ) result = extract_bucket(s3_client) assert result == "extract_bucket" def test_returns_index_error_if_no_buckets(self, s3_client): - s3_client.delete_bucket( - Bucket="extract_bucket" - ) - s3_client.delete_bucket( - Bucket="bucket1" - ) + s3_client.delete_bucket(Bucket="extract_bucket") + s3_client.delete_bucket(Bucket="bucket1") with pytest.raises(IndexError, match="list index out of range"): extract_bucket(s3_client) - + class TestListExistingS3Files: def test_error_if_no_bucket(self, s3_client, caplog): logger = logging.getLogger() -- cgit v1.2.3 From 2a914add8391f345ee1096b9deb729c05d3e06c3 Mon Sep 17 00:00:00 2001 From: Alex Schofield Date: Tue, 20 Aug 2024 15:15:02 +0100 Subject: feat: add more logging for debugging --- src/extract_lambda.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/extract_lambda.py b/src/extract_lambda.py index e9f438b..24f0981 100644 --- a/src/extract_lambda.py +++ b/src/extract_lambda.py @@ -10,13 +10,16 @@ from botocore.exceptions import ClientError from pg8000.native import Connection, InterfaceError, identifier logger = logging.getLogger(__name__) + logging.basicConfig( format="{asctime} - {levelname} - {message}", style="{", datefmt="%Y-%m-%d %H:%M", - level=logging.INFO, + level=logging.DEBUG, ) +logging.getLogger("botocore").setLevel(logging.WARNING) + class DBConnectionException(Exception): """Wraps pg8000.native Error or DatabaseError.""" @@ -110,7 +113,7 @@ def list_existing_s3_files(bucket_name=extract_bucket(), client=boto3.client("s3 results of listing the contents of the s3 bucket, then returns the populated dictionary """ - + logging.info("Listing existing S3 files") existing_files = {} try: -- cgit v1.2.3 From 5493cdc71da4730c4e388d9718f278bc2f14badf Mon Sep 17 00:00:00 2001 From: Alex Schofield Date: Tue, 20 Aug 2024 15:15:28 +0100 Subject: infra(tf): add ListBucket and GetObject permissions --- terraform/iam.tf | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/terraform/iam.tf b/terraform/iam.tf index 3ac8c45..3d62b69 100644 --- a/terraform/iam.tf +++ b/terraform/iam.tf @@ -41,7 +41,8 @@ data "aws_iam_policy_document" "s3_data_policy_doc" { "s3:PutObjectTagging", "s3:PutObjectAcl", "s3:ListObjects", - "s3:ListObjectsV2" + "s3:ListObjectsV2", + "s3:GetObject" ] resources = [ "${aws_s3_bucket.extract_bucket.arn}/*", @@ -53,8 +54,10 @@ data "aws_iam_policy_document" "s3_data_policy_doc" { statement { effect = "Allow" actions = [ - "s3:ListBuckets", - "s3:ListAllMyBuckets" + "s3:ListBucket", + "s3:ListAllMyBuckets", + "s3:ListObjectsV2", + "s3:ListObjects" ] resources = [ "arn:aws:s3:::*", -- cgit v1.2.3 From 53686e2e466bc38f65da15ec617b43e43a1af9f7 Mon Sep 17 00:00:00 2001 From: Alex Schofield Date: Tue, 20 Aug 2024 15:25:13 +0100 Subject: chore: tidy-up repository & remove unused files --- Makefile | 80 -------------------------------------------------- src/secrets_manager.py | 49 ------------------------------- test.py | 0 3 files changed, 129 deletions(-) delete mode 100644 Makefile delete mode 100644 src/secrets_manager.py delete mode 100644 test.py diff --git a/Makefile b/Makefile deleted file mode 100644 index 077cd98..0000000 --- a/Makefile +++ /dev/null @@ -1,80 +0,0 @@ -############################################## -# # -# MAKEFILE TO BUILD THE PROJECT # -# # -############################################## - -PROJECT_NAME = de-project-bentley -REGION = eu-west-2 -PYTHON_INTERPRETER = python -WD=$(shell pwd) -PYTHONPATH=${WD} -SHELL := /bin/bash -PROFILE = default -PIP:=pip - -## PYTHON INTERPRETER ENVIRONMENT -create-environment: - @echo ">>> About to create environment: $(PROJECT_NAME)..." - @echo ">>> check python3 version" - ( \ - $(PYTHON_INTERPRETER) --version; \ - ) - @echo ">>> Setting up VirtualEnv." - ( \ - $(PIP) install -q virtualenv virtualenvwrapper; \ - virtualenv venv --python=$(PYTHON_INTERPRETER); \ - ) - -ACTIVATE_ENV := source venv/bin/activate - -# Execute python related functionalities from within the project's environment -define execute_in_env - $(ACTIVATE_ENV) && $1 -endef - -## Build the environment requirements -requirements: create-environment - $(call execute_in_env, $(PIP) install -r ./requirements.txt) - -# Set Up -## Install bandit -bandit: - $(call execute_in_env, $(PIP) install bandit) - -## Install safety -safety: - $(call execute_in_env, $(PIP) install safety) - -## Install black -black: - $(call execute_in_env, $(PIP) install black) - -## Install coverage -coverage: - $(call execute_in_env, $(PIP) install coverage) - -## Set up dev requirements (bandit, safety, black) -dev-setup: bandit safety black coverage - -# Build / Run - -## Run the security test (bandit + safety) -security-test: - $(call execute_in_env, safety check -r ./requirements.txt) - $(call execute_in_env, bandit -lll */*.py *c/*/*.py) - -## Run the black code check -run-black: - $(call execute_in_env, black ./src/*/*.py ./test/*/*.py) - -## Run the unit tests -unit-test: - $(call execute_in_env, PYTHONPATH=${PYTHONPATH} pytest -v) - -## Run the coverage check -check-coverage: - $(call execute_in_env, PYTHONPATH=${PYTHONPATH} pytest --cov=src test/) - -## Run all checks -run-checks: security-test run-black unit-test check-coverage diff --git a/src/secrets_manager.py b/src/secrets_manager.py deleted file mode 100644 index 3484688..0000000 --- a/src/secrets_manager.py +++ /dev/null @@ -1,49 +0,0 @@ -import boto3 -from botocore.exceptions import ClientError -import json - - -def sm_client(): - sm_client = boto3.client("secretsmanager") - yield sm_client - - -def create_secret( - sm_client, secret_name, cohort_id, user, password, host, database, port -): - secret = { - "cohort_id": cohort_id, - "user": user, - "password": password, - "host": host, - "database": database, - "port": port, - } - - response = sm_client.create_secret( - Name=secret_name, SecretString=json.dumps(secret) - ) - - print(response) - return response - - -def list_secret(sm_client): - response = sm_client.list_secrets() - secret_dict = response["SecretList"] - secret_names = [] - for items in secret_dict: - secret_names.append(items["Name"]) - print(f"{len(secret_names)} secret(s) available") - for name in secret_names: - print(name) - return secret_names - - -def retrieve_secrets(sm_client): - response = sm_client.get_secrets() - - -# retrieve secret -# so lambda can access totesy db -# so lambda connect to the db and then retrieve the data diff --git a/test.py b/test.py deleted file mode 100644 index e69de29..0000000 -- cgit v1.2.3