open-webui/iac/monitoring.tf
2025-12-08 11:27:59 +07:00

186 lines
No EOL
5.4 KiB
HCL

# Auto Scaling Target
resource "aws_appautoscaling_target" "ecs_target" {
max_capacity = 10
min_capacity = 2
resource_id = "service/${var.cluster_name}/${var.service_name}"
scalable_dimension = "ecs:service:DesiredCount"
service_namespace = "ecs"
depends_on = [aws_ecs_service.webui_scaled]
}
# Auto Scaling Policy - Scale Up
resource "aws_appautoscaling_policy" "scale_up" {
name = "openwebui-scale-up"
policy_type = "TargetTrackingScaling"
resource_id = aws_appautoscaling_target.ecs_target.resource_id
scalable_dimension = aws_appautoscaling_target.ecs_target.scalable_dimension
service_namespace = aws_appautoscaling_target.ecs_target.service_namespace
target_tracking_scaling_policy_configuration {
target_value = 70.0
predefined_metric_specification {
predefined_metric_type = "ECSServiceAverageCPUUtilization"
}
scale_out_cooldown = 300
scale_in_cooldown = 300
}
}
# CloudWatch Dashboard
resource "aws_cloudwatch_dashboard" "webui_dashboard" {
dashboard_name = "OpenWebUI-HorizontalScaling"
dashboard_body = jsonencode({
widgets = [
{
type = "metric"
x = 0
y = 0
width = 12
height = 6
properties = {
metrics = [
["AWS/ECS", "CPUUtilization", "ServiceName", var.service_name, "ClusterName", var.cluster_name],
[".", "MemoryUtilization", ".", ".", ".", "."]
]
period = 300
stat = "Average"
region = var.aws_region
title = "ECS Service Metrics"
}
},
{
type = "metric"
x = 0
y = 6
width = 12
height = 6
properties = {
metrics = [
["AWS/ApplicationELB", "TargetResponseTime", "LoadBalancer", aws_lb.webui_alb.arn_suffix],
[".", "RequestCount", ".", "."],
[".", "HTTPCode_Target_2XX_Count", ".", "."],
[".", "HTTPCode_Target_4XX_Count", ".", "."],
[".", "HTTPCode_Target_5XX_Count", ".", "."]
]
period = 300
stat = "Sum"
region = var.aws_region
title = "ALB Metrics"
}
},
{
type = "metric"
x = 0
y = 12
width = 12
height = 6
properties = {
metrics = [
["AWS/ElastiCache", "CPUUtilization", "CacheClusterId", aws_elasticache_replication_group.redis.id],
[".", "DatabaseMemoryUsagePercentage", ".", "."],
[".", "NetworkBytesIn", ".", "."],
[".", "NetworkBytesOut", ".", "."]
]
period = 300
stat = "Average"
region = var.aws_region
title = "Redis Metrics"
}
}
]
})
}
# CloudWatch Alarms
resource "aws_cloudwatch_metric_alarm" "high_cpu" {
alarm_name = "openwebui-high-cpu"
comparison_operator = "GreaterThanThreshold"
evaluation_periods = "2"
metric_name = "CPUUtilization"
namespace = "AWS/ECS"
period = "300"
statistic = "Average"
threshold = "85"
alarm_description = "This metric monitors ECS CPU utilization"
alarm_actions = [aws_sns_topic.alerts.arn]
dimensions = {
ServiceName = var.service_name
ClusterName = var.cluster_name
}
}
resource "aws_cloudwatch_metric_alarm" "high_memory" {
alarm_name = "openwebui-high-memory"
comparison_operator = "GreaterThanThreshold"
evaluation_periods = "2"
metric_name = "MemoryUtilization"
namespace = "AWS/ECS"
period = "300"
statistic = "Average"
threshold = "90"
alarm_description = "This metric monitors ECS memory utilization"
alarm_actions = [aws_sns_topic.alerts.arn]
dimensions = {
ServiceName = var.service_name
ClusterName = var.cluster_name
}
}
resource "aws_cloudwatch_metric_alarm" "alb_high_response_time" {
alarm_name = "openwebui-alb-high-response-time"
comparison_operator = "GreaterThanThreshold"
evaluation_periods = "2"
metric_name = "TargetResponseTime"
namespace = "AWS/ApplicationELB"
period = "300"
statistic = "Average"
threshold = "2"
alarm_description = "This metric monitors ALB response time"
alarm_actions = [aws_sns_topic.alerts.arn]
dimensions = {
LoadBalancer = aws_lb.webui_alb.arn_suffix
}
}
resource "aws_cloudwatch_metric_alarm" "redis_high_cpu" {
alarm_name = "openwebui-redis-high-cpu"
comparison_operator = "GreaterThanThreshold"
evaluation_periods = "2"
metric_name = "CPUUtilization"
namespace = "AWS/ElastiCache"
period = "300"
statistic = "Average"
threshold = "75"
alarm_description = "This metric monitors Redis CPU utilization"
alarm_actions = [aws_sns_topic.alerts.arn]
dimensions = {
CacheClusterId = aws_elasticache_replication_group.redis.id
}
}
# SNS Topic for alerts
resource "aws_sns_topic" "alerts" {
name = "openwebui-scaling-alerts"
tags = {
Name = "OpenWebUI Scaling Alerts"
}
}
# SNS Topic subscription (replace with actual email)
resource "aws_sns_topic_subscription" "email_alerts" {
topic_arn = aws_sns_topic.alerts.arn
protocol = "email"
endpoint = "loi.tra@gravityglobal.com" # Replace with actual email
}