From bb1665fd08d8abf10930875272bdc2b7f8a4c681 Mon Sep 17 00:00:00 2001 From: Ang Bel Date: Tue, 13 Aug 2024 16:25:06 +0100 Subject: rds.tf file with some placehodlers. Additional set up is required --- terraform/lambda.tf | 74 ++++++++++++++++++++++++++++++++++++++++++++++++++ terraform/rds.tf | 78 +++++++++++++++++++++++++++++++++++++++++++++++++++++ terraform/vars.tf | 10 +++++++ 3 files changed, 162 insertions(+) create mode 100644 terraform/lambda.tf create mode 100644 terraform/rds.tf diff --git a/terraform/lambda.tf b/terraform/lambda.tf new file mode 100644 index 0000000..09d6697 --- /dev/null +++ b/terraform/lambda.tf @@ -0,0 +1,74 @@ +### EXTRACT LAMBDA SET UP +data "archive_file" "extract_lambda_zip" { + type = "zip" + source_file = "${path.module}/../src/extract_lambda.py" + output_path = "${path.module}/../extract_function.zip" +} + +resource "aws_lambda_function" "extract_lambda" { + function_name = "${var.extract_lambda_name}" + s3_bucket = aws_s3_bucket.lambda_bucket.bucket + s3_key = "extract_lambda/extract_function.zip" + role = aws_iam_role.PLACEHOLDER_extract_lambda_role.arn # << lambda role placehodler + handler = "extract_lambda.lambda_handler" # << check that the function is called lambda handler + runtime = "python3.11" + environment { + variables = { + output = aws_s3_bucket.extract_bucket.bucket + } + } +} + +resource "aws_lambda_permission" "allow_to_write_to_s3_extract_bucket" { + action = "lambda:InvokeFunction" + function_name = aws_lambda_function.extract_lambda.function_name + principal = "s3.amazonaws.com" + source_arn = aws_s3_bucket.extract_bucket.arn +} + + +### TRANSFORM LAMBDA SET UP +data "archive_file" "transform_lambda_zip" { + type = "zip" + source_file = "${path.module}/../src/transform_lambda.py" + output_path = "${path.module}/../transform_function.zip" +} + +resource "aws_lambda_function" "transform_lambda" { + function_name = "${var.transform_lambda_name}" + s3_bucket = aws_s3_bucket.lambda_bucket.bucket + s3_key = "transform_lambda/transform_function.zip" + role = aws_iam_role.PLACEHOLDER_transform_lambda_role.arn # << lambda role placehodler + handler = "transform_lambda.lambda_handler" # << check that the function is called lambda handler + runtime = "python3.11" + environment { + variables = { + output = aws_s3_bucket.transform_bucket.bucket + } + } +} + +resource "aws_lambda_permission" "allow_to_write_to_s3_transform_bucket" { + action = "lambda:InvokeFunction" + function_name = aws_lambda_function.transform_lambda.function_name + principal = "s3.amazonaws.com" + source_arn = aws_s3_bucket.transform_bucket.arn +} + + +### LOAD LAMBDA SET UP +data "archive_file" "load_lambda_zip" { + type = "zip" + source_file = "${path.module}/../src/load_lambda.py" + output_path = "${path.module}/../load_function.zip" +} + +resource "aws_lambda_function" "load_lambda" { + function_name = "${var.load_lambda_name}" + s3_bucket = aws_s3_bucket.lambda_bucket.bucket + s3_key = "load_lambda/load_function.zip" + role = aws_iam_role.PLACEHOLDER_load_lambda_role.arn # << lambda role placehodler + handler = "load_lambda.lambda_handler" # << check that the function is called lambda handler + runtime = "python3.11" +} + diff --git a/terraform/rds.tf b/terraform/rds.tf new file mode 100644 index 0000000..4b25c5f --- /dev/null +++ b/terraform/rds.tf @@ -0,0 +1,78 @@ +data "aws_availability_zones" "available" {} + +module "vpc" { + source = "terraform-aws-modules/vpc/aws" + version = "2.77.0" + + name = "${var.project_name}" + cidr = "10.0.0.0/16" + azs = data.aws_availability_zones.available.names + public_subnets = ["10.0.4.0/24", "10.0.5.0/24", "10.0.6.0/24"] + enable_dns_hostnames = true + enable_dns_support = true +} + +resource "aws_db_subnet_group" "Terrific-Totes-sub-gr" { + name = "TT-db-subnet" + subnet_ids = module.vpc.public_subnets + + tags = { + Name = "${var.project_name}" + } +} + +resource "aws_security_group" "rds" { + name = "${var.project_name}-rds" + vpc_id = module.vpc.vpc_id + + ingress { + from_port = 5432 + to_port = 5432 + protocol = "tcp" + cidr_blocks = ["0.0.0.0/0"] + } + + egress { + from_port = 5432 + to_port = 5432 + protocol = "tcp" + cidr_blocks = ["0.0.0.0/0"] + } + + tags = { + Name = "${var.project_name}-rds" + } +} + +resource "aws_db_parameter_group" "Terrific-Totes-param-gr" { + name = "TT-db-param" + family = "postgres14" + + parameter { + name = "log_connections" + value = "1" + } +} + +resource "aws_db_instance" "Terrific-Totes-rds" { + db_name = "${var.project_name}" + instance_class = "db.t3.micro" + allocated_storage = 5 + engine = "postgres" + engine_version = "14.1" + username = "user credentials for the root user" # we could use .env here + password = "user password for the root user" # we could use .env here + ### alternatively to providing username nad password we can specify: +# resource "aws_kms_key" "example_key" { +# description = "Example KMS Key" +# } +# within the resource: +# manage_master_user_password = true +# master_user_secret_kms_key_id = aws_kms_key.example.key_id +# } + db_subnet_group_name = aws_db_subnet_group.Terrific-Totes-sub-gr.name + vpc_security_group_ids = [aws_security_group.rds.id] + parameter_group_name = aws_db_parameter_group.Terrific-Totes-param-gr.name + publicly_accessible = false + skip_final_snapshot = true +} \ No newline at end of file diff --git a/terraform/vars.tf b/terraform/vars.tf index fa84222..350c2c6 100644 --- a/terraform/vars.tf +++ b/terraform/vars.tf @@ -23,6 +23,16 @@ variable "transform_lambda_name" { default = "transform-lambda" } +variable "load_lambda_name" { + type = string + default = "load-lambda" +} + +variable "project_name" { + type = string + default = "Terrific-Totes" +} + data "aws_caller_identity" "current" {} data "aws_region" "current" {} \ No newline at end of file -- cgit v1.2.3 From 6c7914a9d33fbaa962cf1f083c2ee79ace62f401 Mon Sep 17 00:00:00 2001 From: Ang Bel Date: Tue, 13 Aug 2024 16:33:56 +0100 Subject: restore load_lambda script --- src/load_lambda.py | 52 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 52 insertions(+) diff --git a/src/load_lambda.py b/src/load_lambda.py index e69de29..5c6718c 100644 --- a/src/load_lambda.py +++ b/src/load_lambda.py @@ -0,0 +1,52 @@ +### Example taken from https://medium.com/@pranay1001090/how-to-load-data-from-amazon-s3-csv-parquet-to-aws-rds-using-python-3dc51dd2186e + +### THIS IS AN EXAMPLE CODE WE CAN PICK FROM, NONE OF THIS HAS BEEN CUSTOMISED YET + +import boto3 +import pandas as pd +import pyarrow.parquet as pq +from io import BytesIO +from sqlalchemy import create_engine + +# AWS credentials and region +aws_access_key = '' +aws_secret_key = '' +region_name = '' + +# S3 bucket and file details +bucket_name = '' +file_prefix = '' +s3_client = boto3.client('s3', aws_access_key_id=aws_access_key, aws_secret_access_key=aws_secret_key, region_name=region_name) + +# RDS connection details +database_name = '' +table_name = '' +rds_host = '' +rds_port = '' +rds_user = '' +rds_password = '' +# Function to load Parquet files into a Pandas DataFrame +def load_parquet_data(s3_bucket, s3_prefix): + file_objects = s3_client.list_objects_v2(Bucket=s3_bucket, Prefix=s3_prefix)['Contents'] + dfs = [] + for file_object in file_objects: + file_key = file_object['Key'] + file_obj = s3_client.get_object(Bucket=s3_bucket, Key=file_key) + parquet_file = pq.ParquetFile(BytesIO(file_obj['Body'].read())) + df = parquet_file.read().to_pandas() + dfs.append(df) + return pd.concat(dfs) + +# Load Parquet data from S3 into a Pandas DataFrame +df = load_parquet_data(bucket_name, file_prefix) +# Connect to RDS +conn_str = f'mysql+pymysql://{rds_user}:{rds_password}@{rds_host}:{rds_port}/{database_name}' +engine = create_engine(conn_str) + +# Write the DataFrame to RDS +df.to_sql(table_name, con=engine, if_exists='replace', index=False) + +# Closing the connection +engine.dispose() + +print('Data loaded successfully!') \ No newline at end of file -- cgit v1.2.3