diff options
| -rw-r--r-- | .gitignore | 10 | ||||
| -rw-r--r-- | events - TBD?.tf | 52 | ||||
| -rw-r--r-- | src/load_lambda.py | 54 | ||||
| -rw-r--r-- | src/transform_lambda.py | 2 | ||||
| -rw-r--r-- | terraform/events.tf | 68 | ||||
| -rw-r--r-- | terraform/iam.tf | 116 | ||||
| -rw-r--r-- | terraform/lambda.tf | 101 | ||||
| -rw-r--r-- | terraform/main.tf | 28 | ||||
| -rw-r--r-- | terraform/s3.tf | 38 | ||||
| -rw-r--r-- | terraform/vars.tf | 26 |
10 files changed, 219 insertions, 276 deletions
diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..d1df545 --- /dev/null +++ b/.gitignore @@ -0,0 +1,10 @@ +*.tfstate +*.tfstate.* +*.tfvars +*.tfvars.json +.terraform.tfstate.lock.info +*.zip +.terraform/ +.terraform* +log* +.DS_Store diff --git a/events - TBD?.tf b/events - TBD?.tf deleted file mode 100644 index 25fb35b..0000000 --- a/events - TBD?.tf +++ /dev/null @@ -1,52 +0,0 @@ -resource "aws_cloudwatch_event_rule" "lambda_trigger" { - name = "lambda-scheduled-trigger" - description = "Schedule to trigger the Lambda function" - schedule_expression = "rate(30 minutes)" - -# event_pattern = jsonencode({ -# detail-type = [ -# "AWS Console Sign In via CloudTrail" -# ] -# }) -} - - -resource "aws_cloudwatch_event_target" "lambda" { - rule = aws_cloudwatch_event_rule.lambda_trigger.name - target_id = "TargetFunctionV1" - arn = aws_lambda_function.my_lambda_function.arn -} - - - -resource "aws_lambda_permission" "allow_eventbridge" { - statement_id = "AllowExecutionFromEventBridge" - action = "lambda:InvokeFunction" - function_name = aws_lambda_function.my_lambda_function.function_name - principal = "events.amazonaws.com" - source_arn = aws_cloudwatch_event_rule.lambda_trigger.arn -} - - -# below is step function 1 -resource "aws_lambda_permission" "allow_s3_ingestion" { - statement_id = "AllowS3InvokeLambdaTransform" - action = "lambda:InvokeFunction" - function_name = aws_lambda_function.lambda_transform.function_name - principal = "s3.amazonaws.com" - source_arn = aws_s3_bucket.extract.arn -} - - -resource "aws_s3_bucket_notification" "extract_bucket_notification" { - bucket = aws_s3_bucket.extract.id - - lambda_function { - events = ["s3:ObjectCreated:*"] - lambda_function_arn = aws_lambda_function.lambda_transform.arn - } - - depends_on = [aws_lambda_permission.allow_s3_ingestion] -} - -# need to duplicate and replace "2" with "3"
\ No newline at end of file diff --git a/src/load_lambda.py b/src/load_lambda.py index 5c6718c..6ee681f 100644 --- a/src/load_lambda.py +++ b/src/load_lambda.py @@ -1,52 +1,2 @@ -### Example taken from https://medium.com/@pranay1001090/how-to-load-data-from-amazon-s3-csv-parquet-to-aws-rds-using-python-3dc51dd2186e - -### THIS IS AN EXAMPLE CODE WE CAN PICK FROM, NONE OF THIS HAS BEEN CUSTOMISED YET - -import boto3 -import pandas as pd -import pyarrow.parquet as pq -from io import BytesIO -from sqlalchemy import create_engine - -# AWS credentials and region -aws_access_key = '<your-access-key>' -aws_secret_key = '<your-secret-key>' -region_name = '<your-region>' - -# S3 bucket and file details -bucket_name = '<your-bucket-name>' -file_prefix = '<your-file-prefix>' -s3_client = boto3.client('s3', aws_access_key_id=aws_access_key, aws_secret_access_key=aws_secret_key, region_name=region_name) - -# RDS connection details -database_name = '<your-database-name>' -table_name = '<your-table-name>' -rds_host = '<your-rds-host>' -rds_port = '<your-rds-port>' -rds_user = '<your-rds-username>' -rds_password = '<your-rds-password>' -# Function to load Parquet files into a Pandas DataFrame -def load_parquet_data(s3_bucket, s3_prefix): - file_objects = s3_client.list_objects_v2(Bucket=s3_bucket, Prefix=s3_prefix)['Contents'] - dfs = [] - for file_object in file_objects: - file_key = file_object['Key'] - file_obj = s3_client.get_object(Bucket=s3_bucket, Key=file_key) - parquet_file = pq.ParquetFile(BytesIO(file_obj['Body'].read())) - df = parquet_file.read().to_pandas() - dfs.append(df) - return pd.concat(dfs) - -# Load Parquet data from S3 into a Pandas DataFrame -df = load_parquet_data(bucket_name, file_prefix) -# Connect to RDS -conn_str = f'mysql+pymysql://{rds_user}:{rds_password}@{rds_host}:{rds_port}/{database_name}' -engine = create_engine(conn_str) - -# Write the DataFrame to RDS -df.to_sql(table_name, con=engine, if_exists='replace', index=False) - -# Closing the connection -engine.dispose() - -print('Data loaded successfully!')
\ No newline at end of file +def lambda_handler(): + pass
\ No newline at end of file diff --git a/src/transform_lambda.py b/src/transform_lambda.py index e69de29..6ee681f 100644 --- a/src/transform_lambda.py +++ b/src/transform_lambda.py @@ -0,0 +1,2 @@ +def lambda_handler(): + pass
\ No newline at end of file diff --git a/terraform/events.tf b/terraform/events.tf index 0196dc3..263141f 100644 --- a/terraform/events.tf +++ b/terraform/events.tf @@ -1,39 +1,57 @@ +resource "random_string" "eventbridge_suffix" { + length = 8 + special = false + upper = false +} + +resource "random_string" "s3_ingestion_suffix" { + length = 8 + special = false + upper = false +} + +resource "random_string" "s3_transform_suffix" { + length = 8 + special = false + upper = false +} + resource "aws_cloudwatch_event_rule" "lambda_trigger" { name = "lambda-scheduled-trigger" description = "Schedule to trigger the Lambda function" schedule_expression = "rate(30 minutes)" - -# event_pattern = jsonencode({ -# detail-type = [ -# "AWS Console Sign In via CloudTrail" -# ] -# }) } - resource "aws_cloudwatch_event_target" "extract_lambda_cw_event" { - rule = aws_cloudwatch_event_rule.lambda_trigger.name - target_id = "TargetFunctionV1" - arn = aws_lambda_function.extract_lambda.arn #replaced lambda name placeholder + rule = aws_cloudwatch_event_rule.lambda_trigger.name + target_id = "TargetFunctionV1" + arn = aws_lambda_function.extract_lambda.arn #replaced lambda name placeholder + depends_on = [aws_lambda_permission.allow_eventbridge] } - resource "aws_lambda_permission" "allow_eventbridge" { - statement_id = "AllowExecutionFromEventBridge" + statement_id = "AllowExecutionFromEventBridge${random_string.eventbridge_suffix.result}" action = "lambda:InvokeFunction" - function_name = aws_lambda_function.extract_lambda.function_name #replaced lambda name placeholder + function_name = aws_lambda_function.extract_lambda.function_name principal = "events.amazonaws.com" - source_arn = aws_cloudwatch_event_rule.lambda_trigger.arn -} + source_arn = aws_cloudwatch_event_rule.lambda_trigger.arn + lifecycle { + replace_triggered_by = [random_string.eventbridge_suffix] + } +} # below is step function 1 resource "aws_lambda_permission" "allow_s3_ingestion" { - statement_id = "AllowS3InvokeLambdaTransform" + statement_id = "AllowS3InvokeLambdaTransform${random_string.s3_ingestion_suffix.result}" action = "lambda:InvokeFunction" function_name = aws_lambda_function.transform_lambda.function_name #replaced lambda name placeholder principal = "s3.amazonaws.com" source_arn = aws_s3_bucket.extract_bucket.arn #replaced bucket name placeholder + + lifecycle { + replace_triggered_by = [random_string.s3_ingestion_suffix] + } } @@ -41,21 +59,23 @@ resource "aws_s3_bucket_notification" "extract_bucket_notification" { bucket = aws_s3_bucket.extract_bucket.id #replaced bucket name placeholder lambda_function { - events = ["s3:ObjectCreated:*"] + events = ["s3:ObjectCreated:*"] lambda_function_arn = aws_lambda_function.transform_lambda.arn #replaced lambda name placeholder } depends_on = [aws_lambda_permission.allow_s3_ingestion] } -###### - -resource "aws_lambda_permission" "allow_s3_transfrom_bucket" { - statement_id = "AllowS3InvokeLambdaTransform" +resource "aws_lambda_permission" "allow_s3_transform_bucket" { + statement_id = "AllowS3InvokeLambdaTransform${random_string.s3_transform_suffix.result}" action = "lambda:InvokeFunction" function_name = aws_lambda_function.transform_lambda.function_name #replaced lambda name placeholder principal = "s3.amazonaws.com" source_arn = aws_s3_bucket.transform_bucket.arn #replaced bucket name placeholder + + lifecycle { + replace_triggered_by = [random_string.s3_transform_suffix] + } } @@ -63,9 +83,9 @@ resource "aws_s3_bucket_notification" "transform_bucket_notification" { bucket = aws_s3_bucket.transform_bucket.id #replaced bucket name placeholder lambda_function { - events = ["s3:ObjectCreated:*"] + events = ["s3:ObjectCreated:*"] lambda_function_arn = aws_lambda_function.transform_lambda.arn #replaced lambda name placeholder } - depends_on = [aws_lambda_permission.allow_s3_transform] -}
\ No newline at end of file + depends_on = [aws_lambda_permission.allow_s3_transform_bucket] +} diff --git a/terraform/iam.tf b/terraform/iam.tf index bb8d932..0e5fa6d 100644 --- a/terraform/iam.tf +++ b/terraform/iam.tf @@ -4,7 +4,7 @@ ######################################################################## # DEFINE MULTI-SERVICE ROLE (lambda, s3, cloudwatch, events) -resource "aws_iam_role" "bentley_multi_service_role" { +resource "aws_iam_role" "multi_service_role" { name = "multi_service_role" assume_role_policy = jsonencode({ @@ -16,9 +16,7 @@ resource "aws_iam_role" "bentley_multi_service_role" { Principal = { Service = [ "lambda.amazonaws.com", - "states.amazonaws.com", - "events.amazonaws.com", - "s3.amazonaws.com" + "scheduler.amazonaws.com" ] } } @@ -27,7 +25,6 @@ resource "aws_iam_role" "bentley_multi_service_role" { } - ######################################################################## # S3 SETUP # Description: allows allows retention/tagging/access control settings @@ -35,54 +32,45 @@ resource "aws_iam_role" "bentley_multi_service_role" { ######################################################################## # S3 DEFINE POLICY -resource "aws_iam_policy" "s3_access_policy" { - name = "s3_access_policy" - path = "/" - description = "IAM policy for S3 access" - - policy = jsonencode({ - Version = "2012-10-17" - Statement = [ - { - Effect = "Allow" - Action = [ - "s3:PutObject", - "s3:GetObject", - "s3:ListBucket" - ] - resources = [ - "${aws_s3_bucket.extract_bucket.arn}/*", - "${aws_s3_bucket.transform_bucket.arn}/*", - "${aws_s3_bucket.lambda_bucket.arn}/*" - ] - } - ] - } - ) +data "aws_iam_policy_document" "s3_data_policy_doc" { + statement { + actions = [ + "s3:PutObject", + "s3:PutObjectRetention", + "s3:PutObjectTagging", + "s3:PutObjectAcl" + ] + resources = [ + "${aws_s3_bucket.extract_bucket.arn}/*", + "${aws_s3_bucket.transform_bucket.arn}/*", + "${aws_s3_bucket.lambda_code_bucket.arn}/*", + ] + } } + ######################################################################## # LAMBDA SETUP # Description: Allows Lambda permission to write to Cloudwatch logs ######################################################################## resource "aws_iam_policy" "lambda_execution_policy" { - name = "lambda_execution_policy" - path = "/" + name = "lambda_execution_policy" + path = "/" description = "IAM policy for Lambda execution" policy = jsonencode({ Version = "2012-10-17" Statement = [ - { + { Effect = "Allow" Action = [ "lambda:InvokeFunction", "lambda:GetFunction" ] Resource = "*" - } - ] + } + ] } ) } @@ -97,7 +85,7 @@ data "aws_iam_policy_document" "cw_document" { actions = ["logs:CreateLogGroup"] resources = [ "arn:aws:logs:${data.aws_region.current.name}:${data.aws_caller_identity.current.account_id}:*" - ] + ] } statement { @@ -105,13 +93,18 @@ data "aws_iam_policy_document" "cw_document" { "logs:CreateLogStream", "logs:CreateLogGroup", "logs:PutLogEvents" - ] - resources = [ - "arn:aws:logs:${data.aws_region.current.name}:${data.aws_caller_identity.current.account_id}:log-group:/aws/lambda/*" - ] + ] + resources = [ + "arn:aws:logs:${data.aws_region.current.name}:${data.aws_caller_identity.current.account_id}:log-group:/aws/lambda/*" + ] } } +resource "aws_iam_policy" "cw_policy" { + name = "cw_policy" + policy = data.aws_iam_policy_document.cw_document.json +} + ######################################################################## # POLICY WRITE & ATTACH ######################################################################## @@ -121,8 +114,45 @@ resource "aws_iam_policy" "s3_write_policy" { policy = data.aws_iam_policy_document.s3_data_policy_doc.json } -# S3 ATTACH POLICY -resource "aws_iam_role_policy_attachment" "lambda_s3_policy_attachment" { - role = aws_iam_role.lambda_role.name +resource "aws_iam_role_policy_attachment" "s3_attachment" { + role = aws_iam_role.multi_service_role.name policy_arn = aws_iam_policy.s3_write_policy.arn -}
\ No newline at end of file +} + +resource "aws_iam_role_policy_attachment" "lambda_attachment" { + role = aws_iam_role.multi_service_role.name + policy_arn = aws_iam_policy.lambda_execution_policy.arn +} + +resource "aws_iam_role_policy_attachment" "cw_attachment" { + role = aws_iam_role.multi_service_role.name + policy_arn = aws_iam_policy.cw_policy.arn +} + +################### +# EVENTS POLICIES # +################### + +data "aws_iam_policy_document" "cloudwatch_events_policy" { + statement { + actions = [ + "events:PutRule", + "events:PutTargets", + "events:RemoveTargets", + "events:DeleteRule", + "events:PutEvents" + ] + resources = ["*"] + effect = "Allow" + } +} + +resource "aws_iam_policy" "cloudwatch_events_policy" { + name = "cloudwatch_events_policy" + policy = data.aws_iam_policy_document.cloudwatch_events_policy.json +} + +resource "aws_iam_role_policy_attachment" "cloudwatch_events_attachment" { + role = aws_iam_role.multi_service_role.name + policy_arn = aws_iam_policy.cloudwatch_events_policy.arn +} diff --git a/terraform/lambda.tf b/terraform/lambda.tf index 09d6697..72d1306 100644 --- a/terraform/lambda.tf +++ b/terraform/lambda.tf @@ -1,74 +1,83 @@ -### EXTRACT LAMBDA SET UP +# Extract Lambda Function data "archive_file" "extract_lambda_zip" { type = "zip" source_file = "${path.module}/../src/extract_lambda.py" output_path = "${path.module}/../extract_function.zip" } +resource "aws_s3_object" "extract_lambda_code" { + bucket = aws_s3_bucket.lambda_code_bucket.bucket + key = "${var.extract_lambda_name}/extract_function.zip" + source = data.archive_file.extract_lambda_zip.output_path + etag = filemd5(data.archive_file.extract_lambda_zip.output_path) +} resource "aws_lambda_function" "extract_lambda" { - function_name = "${var.extract_lambda_name}" - s3_bucket = aws_s3_bucket.lambda_bucket.bucket - s3_key = "extract_lambda/extract_function.zip" - role = aws_iam_role.PLACEHOLDER_extract_lambda_role.arn # << lambda role placehodler - handler = "extract_lambda.lambda_handler" # << check that the function is called lambda handler - runtime = "python3.11" - environment { - variables = { - output = aws_s3_bucket.extract_bucket.bucket - } - } -} + function_name = var.extract_lambda_name + s3_bucket = aws_s3_bucket.lambda_code_bucket.bucket + s3_key = aws_s3_object.extract_lambda_code.key + role = aws_iam_role.multi_service_role.arn + handler = "extract_lambda.extract" + runtime = "python3.11" -resource "aws_lambda_permission" "allow_to_write_to_s3_extract_bucket" { - action = "lambda:InvokeFunction" - function_name = aws_lambda_function.extract_lambda.function_name - principal = "s3.amazonaws.com" - source_arn = aws_s3_bucket.extract_bucket.arn -} + lifecycle { + create_before_destroy = true + } + depends_on = [aws_s3_object.extract_lambda_code] +} -### TRANSFORM LAMBDA SET UP +# Transform Lambda Function data "archive_file" "transform_lambda_zip" { type = "zip" source_file = "${path.module}/../src/transform_lambda.py" output_path = "${path.module}/../transform_function.zip" } +resource "aws_s3_object" "transform_lambda_code" { + bucket = aws_s3_bucket.lambda_code_bucket.bucket + key = "${var.transform_lambda_name}/transform_function.zip" + source = data.archive_file.transform_lambda_zip.output_path + etag = filemd5(data.archive_file.transform_lambda_zip.output_path) +} resource "aws_lambda_function" "transform_lambda" { - function_name = "${var.transform_lambda_name}" - s3_bucket = aws_s3_bucket.lambda_bucket.bucket - s3_key = "transform_lambda/transform_function.zip" - role = aws_iam_role.PLACEHOLDER_transform_lambda_role.arn # << lambda role placehodler - handler = "transform_lambda.lambda_handler" # << check that the function is called lambda handler - runtime = "python3.11" - environment { - variables = { - output = aws_s3_bucket.transform_bucket.bucket - } - } -} + function_name = var.transform_lambda_name + s3_bucket = aws_s3_bucket.lambda_code_bucket.bucket + s3_key = aws_s3_object.transform_lambda_code.key + role = aws_iam_role.multi_service_role.arn + handler = "transform_lambda.transform" + runtime = "python3.11" -resource "aws_lambda_permission" "allow_to_write_to_s3_transform_bucket" { - action = "lambda:InvokeFunction" - function_name = aws_lambda_function.transform_lambda.function_name - principal = "s3.amazonaws.com" - source_arn = aws_s3_bucket.transform_bucket.arn -} + lifecycle { + create_before_destroy = true + } + depends_on = [aws_s3_object.transform_lambda_code] +} -### LOAD LAMBDA SET UP +# Load Lambda Function data "archive_file" "load_lambda_zip" { type = "zip" source_file = "${path.module}/../src/load_lambda.py" output_path = "${path.module}/../load_function.zip" } +resource "aws_s3_object" "load_lambda_code" { + bucket = aws_s3_bucket.lambda_code_bucket.bucket + key = "${var.load_lambda_name}/load_function.zip" + source = data.archive_file.load_lambda_zip.output_path + etag = filemd5(data.archive_file.load_lambda_zip.output_path) +} resource "aws_lambda_function" "load_lambda" { - function_name = "${var.load_lambda_name}" - s3_bucket = aws_s3_bucket.lambda_bucket.bucket - s3_key = "load_lambda/load_function.zip" - role = aws_iam_role.PLACEHOLDER_load_lambda_role.arn # << lambda role placehodler - handler = "load_lambda.lambda_handler" # << check that the function is called lambda handler - runtime = "python3.11" -} + function_name = var.load_lambda_name + s3_bucket = aws_s3_bucket.lambda_code_bucket.bucket + s3_key = aws_s3_object.load_lambda_code.key + role = aws_iam_role.multi_service_role.arn + handler = "load_lambda.load" + runtime = "python3.11" + + lifecycle { + create_before_destroy = true + } + depends_on = [aws_s3_object.load_lambda_code] +} diff --git a/terraform/main.tf b/terraform/main.tf index 3ca9a3d..3b06701 100644 --- a/terraform/main.tf +++ b/terraform/main.tf @@ -1,26 +1,26 @@ terraform { required_providers { aws = { - source = "hashicorp/aws" - version = "~>5.0" + source = "hashicorp/aws" + version = "~>5.0" } } backend "s3" { - bucket = "bentley-secrets" - key = "bentley-project/terraform.tfstate" + bucket = "bentley-project-secrets" + key = "bentley-project/terraform.tfstate" region = "eu-west-2" } } provider "aws" { - region = "eu-west-2" - default_tags { - tags = { - ProjectName = "Terrific-Totes" - Team = "Team-Bentley" - Environment = "Dev" - GitHubRepo = "de-project-bentley" - ManagedBy = "Terraform" - } + region = "eu-west-2" + default_tags { + tags = { + ProjectName = "Terrific-Totes" + Team = "Team-Bentley" + Environment = "Dev" + GitHubRepo = "de-project-bentley" + ManagedBy = "Terraform" } -}
\ No newline at end of file + } +} diff --git a/terraform/s3.tf b/terraform/s3.tf index 8cb65ef..d5cdee3 100644 --- a/terraform/s3.tf +++ b/terraform/s3.tf @@ -1,40 +1,14 @@ ### EXTRACT BUCKET SET-UP resource "aws_s3_bucket" "extract_bucket" { - bucket = "${var.s3_extract_bucket_name}" + bucket_prefix = "${var.s3_extract_bucket_name}-" } -resource "aws_s3_object" "extract_lambda_code" { - bucket = aws_s3_bucket.s3_code_bucket_name.bucket - key = "${var.extract_lambda_name}/extract_function.zip" - source = "${path.module}/../extract_function.zip" -} # << can't figure out how this is being used but we seem to need it - -resource "aws_s3_bucket_notification" "extract_bucket_notification" { - bucket = aws_s3_bucket.extract_bucket.id - lambda_function { - lambda_function_arn = aws_lambda_function.extract_lambda.arn - events = ["s3:ObjectCreated:*"] - } - depends_on = [aws_lambda_permission.allow_to_write_to_s3_extract_bucket] -} # << is this the correct permission dependency? - - ### TRANSFORM BUCKET SET-UP resource "aws_s3_bucket" "transform_bucket" { - bucket = "${var.s3_transform_bucket_name}" + bucket_prefix = "${var.s3_transform_bucket_name}-" } -resource "aws_s3_object" "transform_lambda_code" { - bucket = aws_s3_bucket.s3_code_bucket_name.bucket - key = "${var.transform_lambda_name}/transform_function.zip" - source = "${path.module}/../transform_function.zip" -} # << can't figure out how this is being used but we seem to need it - -resource "aws_s3_bucket_notification" "transform_bucket_notification" { - bucket = aws_s3_bucket.transform_bucket.id - lambda_function { - lambda_function_arn = aws_lambda_function.transform_lambda.arn - events = ["s3:ObjectCreated:*"] - } - depends_on = [aws_lambda_permission.allow_to_write_to_s3_transform_bucket] -} # << is this the correct permission dependency? +### LAMBDA BUCKET +resource "aws_s3_bucket" "lambda_code_bucket" { + bucket_prefix = "${var.s3_code_bucket_name}-" +} diff --git a/terraform/vars.tf b/terraform/vars.tf index 350c2c6..d5cdafb 100644 --- a/terraform/vars.tf +++ b/terraform/vars.tf @@ -1,31 +1,31 @@ variable "s3_extract_bucket_name" { - type = string - default = "extract-bucket" + type = string + default = "extract-bucket" } variable "s3_transform_bucket_name" { - type = string - default = "transform-bucket" + type = string + default = "transform-bucket" } variable "s3_code_bucket_name" { - type = string - default = "lambda-bucket" + type = string + default = "lambda-bucket" } variable "extract_lambda_name" { - type = string - default = "extract-lambda" + type = string + default = "extract-lambda" } variable "transform_lambda_name" { - type = string - default = "transform-lambda" + type = string + default = "transform-lambda" } variable "load_lambda_name" { - type = string - default = "load-lambda" + type = string + default = "load-lambda" } variable "project_name" { @@ -35,4 +35,4 @@ variable "project_name" { data "aws_caller_identity" "current" {} -data "aws_region" "current" {}
\ No newline at end of file +data "aws_region" "current" {} |
