aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorlian-manonog <160282780+lian-manonog@users.noreply.github.com>2024-08-15 09:54:23 +0100
committerGitHub <noreply@github.com>2024-08-15 09:54:23 +0100
commita1cbded6b145b6cba75180b218d7445a51c39f4f (patch)
tree5defda49bf82bb3527832277fbcb0eb1e885daea
parentb34f54057c6efc24e2973bac0bdda1510994783c (diff)
parent431d943f7464bcaf3790b834e70aa3a0fa284e90 (diff)
downloadde-project-bentley-a1cbded6b145b6cba75180b218d7445a51c39f4f.tar.gz
de-project-bentley-a1cbded6b145b6cba75180b218d7445a51c39f4f.zip
Merge pull request #23 from ajschofield/feature/terraform-additional-functionality
Feature/terraform additional functionality
-rw-r--r--src/load_lambda.py52
-rw-r--r--terraform/rds.tf78
-rw-r--r--terraform/vars.tf5
3 files changed, 135 insertions, 0 deletions
diff --git a/src/load_lambda.py b/src/load_lambda.py
index e69de29..5c6718c 100644
--- a/src/load_lambda.py
+++ b/src/load_lambda.py
@@ -0,0 +1,52 @@
+### Example taken from https://medium.com/@pranay1001090/how-to-load-data-from-amazon-s3-csv-parquet-to-aws-rds-using-python-3dc51dd2186e
+
+### THIS IS AN EXAMPLE CODE WE CAN PICK FROM, NONE OF THIS HAS BEEN CUSTOMISED YET
+
+import boto3
+import pandas as pd
+import pyarrow.parquet as pq
+from io import BytesIO
+from sqlalchemy import create_engine
+
+# AWS credentials and region
+aws_access_key = '<your-access-key>'
+aws_secret_key = '<your-secret-key>'
+region_name = '<your-region>'
+
+# S3 bucket and file details
+bucket_name = '<your-bucket-name>'
+file_prefix = '<your-file-prefix>'
+s3_client = boto3.client('s3', aws_access_key_id=aws_access_key, aws_secret_access_key=aws_secret_key, region_name=region_name)
+
+# RDS connection details
+database_name = '<your-database-name>'
+table_name = '<your-table-name>'
+rds_host = '<your-rds-host>'
+rds_port = '<your-rds-port>'
+rds_user = '<your-rds-username>'
+rds_password = '<your-rds-password>'
+# Function to load Parquet files into a Pandas DataFrame
+def load_parquet_data(s3_bucket, s3_prefix):
+ file_objects = s3_client.list_objects_v2(Bucket=s3_bucket, Prefix=s3_prefix)['Contents']
+ dfs = []
+ for file_object in file_objects:
+ file_key = file_object['Key']
+ file_obj = s3_client.get_object(Bucket=s3_bucket, Key=file_key)
+ parquet_file = pq.ParquetFile(BytesIO(file_obj['Body'].read()))
+ df = parquet_file.read().to_pandas()
+ dfs.append(df)
+ return pd.concat(dfs)
+
+# Load Parquet data from S3 into a Pandas DataFrame
+df = load_parquet_data(bucket_name, file_prefix)
+# Connect to RDS
+conn_str = f'mysql+pymysql://{rds_user}:{rds_password}@{rds_host}:{rds_port}/{database_name}'
+engine = create_engine(conn_str)
+
+# Write the DataFrame to RDS
+df.to_sql(table_name, con=engine, if_exists='replace', index=False)
+
+# Closing the connection
+engine.dispose()
+
+print('Data loaded successfully!') \ No newline at end of file
diff --git a/terraform/rds.tf b/terraform/rds.tf
new file mode 100644
index 0000000..4b25c5f
--- /dev/null
+++ b/terraform/rds.tf
@@ -0,0 +1,78 @@
+data "aws_availability_zones" "available" {}
+
+module "vpc" {
+ source = "terraform-aws-modules/vpc/aws"
+ version = "2.77.0"
+
+ name = "${var.project_name}"
+ cidr = "10.0.0.0/16"
+ azs = data.aws_availability_zones.available.names
+ public_subnets = ["10.0.4.0/24", "10.0.5.0/24", "10.0.6.0/24"]
+ enable_dns_hostnames = true
+ enable_dns_support = true
+}
+
+resource "aws_db_subnet_group" "Terrific-Totes-sub-gr" {
+ name = "TT-db-subnet"
+ subnet_ids = module.vpc.public_subnets
+
+ tags = {
+ Name = "${var.project_name}"
+ }
+}
+
+resource "aws_security_group" "rds" {
+ name = "${var.project_name}-rds"
+ vpc_id = module.vpc.vpc_id
+
+ ingress {
+ from_port = 5432
+ to_port = 5432
+ protocol = "tcp"
+ cidr_blocks = ["0.0.0.0/0"]
+ }
+
+ egress {
+ from_port = 5432
+ to_port = 5432
+ protocol = "tcp"
+ cidr_blocks = ["0.0.0.0/0"]
+ }
+
+ tags = {
+ Name = "${var.project_name}-rds"
+ }
+}
+
+resource "aws_db_parameter_group" "Terrific-Totes-param-gr" {
+ name = "TT-db-param"
+ family = "postgres14"
+
+ parameter {
+ name = "log_connections"
+ value = "1"
+ }
+}
+
+resource "aws_db_instance" "Terrific-Totes-rds" {
+ db_name = "${var.project_name}"
+ instance_class = "db.t3.micro"
+ allocated_storage = 5
+ engine = "postgres"
+ engine_version = "14.1"
+ username = "user credentials for the root user" # we could use .env here
+ password = "user password for the root user" # we could use .env here
+ ### alternatively to providing username nad password we can specify:
+# resource "aws_kms_key" "example_key" {
+# description = "Example KMS Key"
+# }
+# within the resource:
+# manage_master_user_password = true
+# master_user_secret_kms_key_id = aws_kms_key.example.key_id
+# }
+ db_subnet_group_name = aws_db_subnet_group.Terrific-Totes-sub-gr.name
+ vpc_security_group_ids = [aws_security_group.rds.id]
+ parameter_group_name = aws_db_parameter_group.Terrific-Totes-param-gr.name
+ publicly_accessible = false
+ skip_final_snapshot = true
+} \ No newline at end of file
diff --git a/terraform/vars.tf b/terraform/vars.tf
index cc9348a..350c2c6 100644
--- a/terraform/vars.tf
+++ b/terraform/vars.tf
@@ -28,6 +28,11 @@ variable "load_lambda_name" {
default = "load-lambda"
}
+variable "project_name" {
+ type = string
+ default = "Terrific-Totes"
+}
+
data "aws_caller_identity" "current" {}
data "aws_region" "current" {} \ No newline at end of file
git.ajschof.me — hosted by ajschofield — powered by cgit