From e917f7eb672c532e890f18f1691b71075f2fd180 Mon Sep 17 00:00:00 2001 From: loitragg Date: Sat, 27 Sep 2025 16:03:43 +0700 Subject: [PATCH] feat(infra): add common module Grafana --- iac/modules/grafana-otel/README.md | 231 +++++++++++++++++ iac/modules/grafana-otel/main.tf | 347 ++++++++++++++++++++++++++ iac/modules/grafana-otel/outputs.tf | 114 +++++++++ iac/modules/grafana-otel/variables.tf | 156 ++++++++++++ iac/modules/grafana-otel/versions.tf | 10 + 5 files changed, 858 insertions(+) create mode 100644 iac/modules/grafana-otel/README.md create mode 100644 iac/modules/grafana-otel/main.tf create mode 100644 iac/modules/grafana-otel/outputs.tf create mode 100644 iac/modules/grafana-otel/variables.tf create mode 100644 iac/modules/grafana-otel/versions.tf diff --git a/iac/modules/grafana-otel/README.md b/iac/modules/grafana-otel/README.md new file mode 100644 index 0000000000..6b1029f24b --- /dev/null +++ b/iac/modules/grafana-otel/README.md @@ -0,0 +1,231 @@ +# Grafana OTEL Module + +This Terraform module deploys a standalone Grafana OTEL LGTM (Logs, Grafana, Tempo, Mimir) stack on AWS ECS Fargate for OpenTelemetry monitoring and observability. + +## Features + +- **Complete OTEL Stack**: Grafana + Prometheus + Tempo + Loki in a single container +- **ECS Fargate Deployment**: Serverless, scalable container deployment +- **Service Discovery**: Automatic DNS registration for easy service connectivity +- **Security**: Configurable security groups and network access controls +- **Auto Scaling**: Optional ECS autoscaling based on CPU utilization +- **CloudWatch Integration**: Structured logging with configurable retention + +## Architecture + +``` +┌─────────────────┐ ┌──────────────────┐ ┌─────────────────┐ +│ Applications │───▶│ OTLP Endpoints │───▶│ Grafana UI │ +│ │ │ (4317/4318) │ │ (3000) │ +└─────────────────┘ └──────────────────┘ └─────────────────┘ + │ + ▼ + ┌──────────────────┐ + │ ECS Fargate │ + │ - Grafana │ + │ - Prometheus │ + │ - Tempo │ + │ - Loki │ + └──────────────────┘ +``` + +## Usage + +### Basic Usage + +```hcl +module "grafana_monitoring" { + source = "./modules/grafana-otel" + + # Core Infrastructure + vpc_id = "vpc-12345678" + private_subnet_ids = ["subnet-12345678", "subnet-87654321"] + cluster_name = "my-ecs-cluster" + + # Network Access + allowed_cidr_blocks = ["10.0.0.0/8", "192.168.0.0/16"] + + # Optional: OpenTelemetry Sources + otlp_sources_security_group_ids = ["sg-app1", "sg-app2"] + + tags = { + Environment = "production" + Project = "monitoring" + } +} +``` + +### Advanced Usage with Existing Service Discovery + +```hcl +module "grafana_monitoring" { + source = "./modules/grafana-otel" + + # Core Infrastructure + vpc_id = "vpc-12345678" + private_subnet_ids = ["subnet-12345678", "subnet-87654321"] + cluster_name = "my-ecs-cluster" + + # Use existing service discovery namespace + service_discovery_namespace_id = "ns-12345678" + service_name = "monitoring" + + # Custom configuration + environment = "staging" + cpu = 2048 + memory = 4096 + desired_count = 2 + enable_autoscaling = true + max_capacity = 3 + + # Custom Grafana credentials + grafana_admin_user = "monitoring-admin" + grafana_admin_password = "secure-password-123" + + tags = { + Environment = "staging" + Project = "monitoring" + } +} +``` + +## Requirements + +| Name | Version | +|------|---------| +| terraform | >= 1.0 | +| aws | >= 5.0 | + +## Providers + +| Name | Version | +|------|---------| +| aws | >= 5.0 | + +## Resources Created + +- **ECS Service & Task Definition**: Fargate-based Grafana OTEL LGTM container +- **Service Discovery**: DNS service registration for easy connectivity +- **Security Groups**: Network access controls for Grafana UI and OTLP endpoints +- **IAM Roles**: Execution role with necessary permissions +- **CloudWatch Log Group**: Centralized logging with configurable retention +- **Auto Scaling** (optional): CPU-based scaling for high availability + +## Inputs + +| Name | Description | Type | Default | Required | +|------|-------------|------|---------|:--------:| +| vpc_id | VPC ID where Grafana will be deployed | `string` | n/a | yes | +| private_subnet_ids | Private subnet IDs for Grafana ECS tasks | `list(string)` | n/a | yes | +| cluster_name | ECS cluster name where Grafana will be deployed | `string` | n/a | yes | +| aws_region | AWS region for deployment | `string` | `"us-east-1"` | no | +| allowed_cidr_blocks | CIDR blocks allowed to access Grafana UI | `list(string)` | `["10.0.0.0/8", "172.16.0.0/12", "192.168.0.0/16"]` | no | +| otlp_sources_security_group_ids | Security group IDs that should be allowed to send OTLP data | `list(string)` | `[]` | no | +| grafana_admin_user | Grafana admin username | `string` | `"admin"` | no | +| grafana_admin_password | Grafana admin password | `string` | `"openwebui_monitoring_2024"` | no | +| cpu | CPU units for Grafana task | `number` | `1024` | no | +| memory | Memory (MB) for Grafana task | `number` | `2048` | no | +| enable_autoscaling | Enable ECS autoscaling for Grafana | `bool` | `true` | no | + +See [variables.tf](./variables.tf) for complete list of inputs. + +## Outputs + +| Name | Description | +|------|-------------| +| grafana_dashboard_url | Grafana dashboard URL | +| grafana_admin_credentials | Grafana admin login credentials (sensitive) | +| otlp_endpoints | OpenTelemetry OTLP endpoints (gRPC and HTTP) | +| security_group_id | Security group ID for Grafana tasks | +| setup_instructions | Complete setup and integration instructions | + +See [outputs.tf](./outputs.tf) for complete list of outputs. + +## Integration with Applications + +To send telemetry data from your applications to this Grafana instance: + +### 1. Add Application Security Groups + +```hcl +module "grafana_monitoring" { + source = "./modules/grafana-otel" + # ... other configuration + + otlp_sources_security_group_ids = [ + aws_security_group.my_app.id, + aws_security_group.another_app.id + ] +} +``` + +### 2. Configure Application Environment Variables + +```bash +# In your application environment +OTEL_EXPORTER_OTLP_ENDPOINT=http://otel-monitor.my-namespace:4317 +OTEL_EXPORTER_OTLP_INSECURE=true +OTEL_SERVICE_NAME=my-application +``` + +### 3. Verify Integration + +```bash +# Check service discovery +nslookup otel-monitor.my-namespace + +# Test OTLP endpoint +curl http://otel-monitor.my-namespace:4317 + +# Access Grafana UI +curl http://otel-monitor.my-namespace:3000 +``` + +## Monitoring and Troubleshooting + +### Access Grafana Dashboard + +1. Connect to your VPC (via VPN or bastion host) +2. Navigate to the Grafana URL from module outputs +3. Login with the admin credentials +4. Explore pre-configured data sources: + - **Prometheus**: Metrics and monitoring + - **Tempo**: Distributed tracing + - **Loki**: Log aggregation + +### Common Issues + +- **Connection refused**: Check security group rules and CIDR blocks +- **Service not starting**: Check CloudWatch logs and ECS service events +- **No telemetry data**: Verify OTLP source security groups and endpoints + +### Useful Commands + +```bash +# Check ECS service status +aws ecs describe-services --cluster my-cluster --services grafana-otel + +# View logs +aws logs tail /ecs/grafana-otel --follow + +# Check service discovery +aws servicediscovery list-services --filters Name=NAMESPACE_ID,Values=ns-12345678 +``` + +## Security Considerations + +- Grafana admin password is configurable but stored in Terraform state +- Consider using AWS Secrets Manager for production passwords +- Network access is controlled via security groups and CIDR blocks +- ECS tasks run with least privilege IAM permissions + +## Cost Optimization + +- Default configuration uses 1 vCPU and 2GB RAM (estimated $35-50/month) +- Enable autoscaling to handle traffic spikes efficiently +- Adjust log retention period to control CloudWatch costs +- Consider using Spot instances for non-production environments + +## License + +This module is part of the OpenWebUI infrastructure project. diff --git a/iac/modules/grafana-otel/main.tf b/iac/modules/grafana-otel/main.tf new file mode 100644 index 0000000000..1cb504bcf8 --- /dev/null +++ b/iac/modules/grafana-otel/main.tf @@ -0,0 +1,347 @@ +# Local values for consistent naming +locals { + name_prefix = var.name_prefix + common_tags = merge( + { + Environment = var.environment + Module = "grafana-otel" + ManagedBy = "terraform" + }, + var.tags + ) +} + +# Service Discovery Namespace (create if not provided) +resource "aws_service_discovery_private_dns_namespace" "grafana" { + count = var.service_discovery_namespace_id == "" ? 1 : 0 + + name = var.service_discovery_namespace_name + vpc = var.vpc_id + + description = "Service discovery namespace for Grafana OTEL monitoring" + + tags = merge(local.common_tags, { + Name = "${local.name_prefix}-namespace" + }) +} + + +# Service Discovery Service for Grafana +resource "aws_service_discovery_service" "grafana" { + name = var.service_name + + dns_config { + namespace_id = var.service_discovery_namespace_id != "" ? var.service_discovery_namespace_id : aws_service_discovery_private_dns_namespace.grafana[0].id + + dns_records { + ttl = 60 + type = "A" + } + + routing_policy = "MULTIVALUE" + } + + health_check_custom_config { + failure_threshold = 1 + } + + description = "Grafana OTEL LGTM monitoring stack service discovery" + + tags = merge(local.common_tags, { + Name = "${local.name_prefix}-service-discovery" + }) +} + +# CloudWatch Log Group for Grafana +resource "aws_cloudwatch_log_group" "grafana" { + name = "/ecs/${local.name_prefix}" + retention_in_days = var.log_retention_days + + tags = merge(local.common_tags, { + Name = "${local.name_prefix}-logs" + }) +} + +# Security Group for Grafana ECS Tasks +resource "aws_security_group" "grafana" { + name_prefix = "${local.name_prefix}-" + vpc_id = var.vpc_id + description = "Security group for Grafana OTEL ECS tasks" + + # Allow Grafana UI access from specified CIDR blocks + ingress { + description = "Grafana UI access" + from_port = 3000 + to_port = 3000 + protocol = "tcp" + cidr_blocks = var.allowed_cidr_blocks + } + + # Allow OTLP gRPC from specified security groups + dynamic "ingress" { + for_each = length(var.otlp_sources_security_group_ids) > 0 ? [1] : [] + content { + description = "OTLP gRPC from sources" + from_port = 4317 + to_port = 4317 + protocol = "tcp" + security_groups = var.otlp_sources_security_group_ids + } + } + + # Allow OTLP HTTP from specified security groups + dynamic "ingress" { + for_each = length(var.otlp_sources_security_group_ids) > 0 ? [1] : [] + content { + description = "OTLP HTTP from sources" + from_port = 4318 + to_port = 4318 + protocol = "tcp" + security_groups = var.otlp_sources_security_group_ids + } + } + + # Allow all outbound traffic + egress { + from_port = 0 + to_port = 0 + protocol = "-1" + cidr_blocks = ["0.0.0.0/0"] + } + + tags = merge(local.common_tags, { + Name = "${local.name_prefix}-security-group" + }) +} + +# IAM Role for ECS Task Execution +resource "aws_iam_role" "grafana_execution" { + name = "${local.name_prefix}-execution-role" + + assume_role_policy = jsonencode({ + Version = "2012-10-17" + Statement = [ + { + Action = "sts:AssumeRole" + Effect = "Allow" + Principal = { + Service = "ecs-tasks.amazonaws.com" + } + } + ] + }) + + tags = merge(local.common_tags, { + Name = "${local.name_prefix}-execution-role" + }) +} + +# Attach AWS managed ECS execution policy +resource "aws_iam_role_policy_attachment" "grafana_execution_policy" { + role = aws_iam_role.grafana_execution.name + policy_arn = "arn:aws:iam::aws:policy/service-role/AmazonECSTaskExecutionRolePolicy" +} + +# Additional policy for CloudWatch logs +resource "aws_iam_role_policy" "grafana_logs_policy" { + name = "${local.name_prefix}-logs-policy" + role = aws_iam_role.grafana_execution.id + + policy = jsonencode({ + Version = "2012-10-17" + Statement = [ + { + Effect = "Allow" + Action = [ + "logs:CreateLogGroup", + "logs:CreateLogStream", + "logs:PutLogEvents" + ] + Resource = "${aws_cloudwatch_log_group.grafana.arn}:*" + } + ] + }) +} + +# ECS Task Definition for Grafana OTEL LGTM +resource "aws_ecs_task_definition" "grafana" { + family = local.name_prefix + network_mode = "awsvpc" + requires_compatibilities = ["FARGATE"] + cpu = var.cpu + memory = var.memory + execution_role_arn = aws_iam_role.grafana_execution.arn + task_role_arn = aws_iam_role.grafana_execution.arn + + container_definitions = jsonencode([ + { + name = "grafana-otel-lgtm" + image = var.container_image + cpu = 0 + essential = true + + portMappings = [ + { + containerPort = 3000 + hostPort = 3000 + protocol = "tcp" + name = "grafana-ui" + appProtocol = "http" + }, + { + containerPort = 4317 + hostPort = 4317 + protocol = "tcp" + name = "otlp-grpc" + }, + { + containerPort = 4318 + hostPort = 4318 + protocol = "tcp" + name = "otlp-http" + appProtocol = "http" + } + ] + + environment = [ + { + name = "GF_SECURITY_ADMIN_PASSWORD" + value = var.grafana_admin_password + }, + { + name = "GF_SECURITY_ADMIN_USER" + value = var.grafana_admin_user + }, + { + name = "GF_INSTALL_PLUGINS" + value = "" + }, + { + name = "GF_FEATURE_TOGGLES_ENABLE" + value = "traceqlEditor" + } + ] + + logConfiguration = { + logDriver = "awslogs" + options = { + "awslogs-group" = aws_cloudwatch_log_group.grafana.name + "mode" = "non-blocking" + "awslogs-create-group" = "true" + "max-buffer-size" = "25m" + "awslogs-region" = var.aws_region + "awslogs-stream-prefix" = "grafana" + } + } + + healthCheck = { + command = [ + "CMD-SHELL", + "curl --silent --fail http://localhost:3000/api/health || exit 1" + ] + interval = 30 + timeout = 10 + retries = 3 + startPeriod = 60 + } + + systemControls = [] + } + ]) + + tags = merge(local.common_tags, { + Name = "${local.name_prefix}-task-definition" + }) +} + +# ECS Service for Grafana +resource "aws_ecs_service" "grafana" { + name = local.name_prefix + cluster = var.cluster_name + task_definition = aws_ecs_task_definition.grafana.arn + desired_count = var.desired_count + + triggers = { + redeployment = sha1(jsonencode(aws_ecs_task_definition.grafana.container_definitions)) + } + + capacity_provider_strategy { + capacity_provider = "FARGATE" + weight = 1 + base = 0 + } + + platform_version = "LATEST" + + deployment_maximum_percent = 200 + deployment_minimum_healthy_percent = 100 + + deployment_circuit_breaker { + enable = true + rollback = false + } + + network_configuration { + subnets = var.private_subnet_ids + security_groups = concat( + [aws_security_group.grafana.id], + var.additional_security_group_ids + ) + assign_public_ip = false + } + + service_registries { + registry_arn = aws_service_discovery_service.grafana.arn + } + + deployment_controller { + type = "ECS" + } + + enable_execute_command = var.enable_execute_command + + tags = merge(local.common_tags, { + Name = "${local.name_prefix}-service" + }) + + lifecycle { + ignore_changes = [desired_count] + } +} + +# Auto Scaling Target (if enabled) +resource "aws_appautoscaling_target" "grafana" { + count = var.enable_autoscaling ? 1 : 0 + + max_capacity = var.max_capacity + min_capacity = var.min_capacity + resource_id = "service/${var.cluster_name}/${aws_ecs_service.grafana.name}" + scalable_dimension = "ecs:service:DesiredCount" + service_namespace = "ecs" + + depends_on = [aws_ecs_service.grafana] + + tags = local.common_tags +} + +# Auto Scaling Policy (if enabled) +resource "aws_appautoscaling_policy" "grafana_scale_up" { + count = var.enable_autoscaling ? 1 : 0 + + name = "${local.name_prefix}-scale-up" + policy_type = "TargetTrackingScaling" + resource_id = aws_appautoscaling_target.grafana[0].resource_id + scalable_dimension = aws_appautoscaling_target.grafana[0].scalable_dimension + service_namespace = aws_appautoscaling_target.grafana[0].service_namespace + + target_tracking_scaling_policy_configuration { + target_value = var.cpu_target_value + + predefined_metric_specification { + predefined_metric_type = "ECSServiceAverageCPUUtilization" + } + + scale_out_cooldown = 600 # 10 minutes + scale_in_cooldown = 300 # 5 minutes + } +} diff --git a/iac/modules/grafana-otel/outputs.tf b/iac/modules/grafana-otel/outputs.tf new file mode 100644 index 0000000000..9368320896 --- /dev/null +++ b/iac/modules/grafana-otel/outputs.tf @@ -0,0 +1,114 @@ +# Service Information +output "service_name" { + description = "Name of the Grafana ECS service" + value = aws_ecs_service.grafana.name +} + +output "service_arn" { + description = "ARN of the Grafana ECS service" + value = aws_ecs_service.grafana.id +} + +output "task_definition_arn" { + description = "ARN of the Grafana task definition" + value = aws_ecs_task_definition.grafana.arn +} + +# Access Information +output "grafana_dashboard_url" { + description = "Grafana dashboard URL (accessible from allowed CIDR blocks)" + value = var.service_discovery_namespace_id != "" ? "http://${var.service_name}.${var.service_discovery_namespace_name}:3000" : "http://${var.service_name}.${aws_service_discovery_private_dns_namespace.grafana[0].name}:3000" +} + +output "grafana_admin_credentials" { + description = "Grafana admin login credentials" + value = { + username = var.grafana_admin_user + password = var.grafana_admin_password + } + sensitive = true +} + +output "otlp_endpoints" { + description = "OpenTelemetry OTLP endpoints for telemetry data" + value = { + grpc = var.service_discovery_namespace_id != "" ? "http://${var.service_name}.${var.service_discovery_namespace_name}:4317" : "http://${var.service_name}.${aws_service_discovery_private_dns_namespace.grafana[0].name}:4317" + http = var.service_discovery_namespace_id != "" ? "http://${var.service_name}.${var.service_discovery_namespace_name}:4318" : "http://${var.service_name}.${aws_service_discovery_private_dns_namespace.grafana[0].name}:4318" + } +} + +# Service Discovery Information +output "service_discovery_namespace_id" { + description = "Service discovery namespace ID" + value = var.service_discovery_namespace_id != "" ? var.service_discovery_namespace_id : aws_service_discovery_private_dns_namespace.grafana[0].id +} + +output "service_discovery_namespace_name" { + description = "Service discovery namespace name" + value = var.service_discovery_namespace_id != "" ? var.service_discovery_namespace_name : aws_service_discovery_private_dns_namespace.grafana[0].name +} + +output "service_discovery_service_arn" { + description = "Service discovery service ARN" + value = aws_service_discovery_service.grafana.arn +} + +# Security Information +output "security_group_id" { + description = "Security group ID for Grafana tasks" + value = aws_security_group.grafana.id +} + +output "execution_role_arn" { + description = "IAM execution role ARN for Grafana tasks" + value = aws_iam_role.grafana_execution.arn +} + +# Monitoring Information +output "cloudwatch_log_group_name" { + description = "CloudWatch log group name for Grafana logs" + value = aws_cloudwatch_log_group.grafana.name +} + +output "cloudwatch_log_group_arn" { + description = "CloudWatch log group ARN for Grafana logs" + value = aws_cloudwatch_log_group.grafana.arn +} + +# Setup Instructions +output "setup_instructions" { + description = "Instructions for accessing and configuring Grafana monitoring" + value = <<-EOT + + === GRAFANA OTEL MONITORING SETUP === + + 1. VERIFICATION COMMANDS (run from within VPC): + nslookup ${var.service_name}.${var.service_discovery_namespace_id != "" ? var.service_discovery_namespace_name : aws_service_discovery_private_dns_namespace.grafana[0].name} + curl ${var.service_discovery_namespace_id != "" ? "http://${var.service_name}.${var.service_discovery_namespace_name}:3000" : "http://${var.service_name}.${aws_service_discovery_private_dns_namespace.grafana[0].name}:3000"} + + 2. GRAFANA ACCESS: + URL: ${var.service_discovery_namespace_id != "" ? "http://${var.service_name}.${var.service_discovery_namespace_name}:3000" : "http://${var.service_name}.${aws_service_discovery_private_dns_namespace.grafana[0].name}:3000"} + Username: ${var.grafana_admin_user} + Password: ${var.grafana_admin_password} + + 3. OPENTELEMETRY ENDPOINTS: + - OTLP gRPC: ${var.service_discovery_namespace_id != "" ? "http://${var.service_name}.${var.service_discovery_namespace_name}:4317" : "http://${var.service_name}.${aws_service_discovery_private_dns_namespace.grafana[0].name}:4317"} + - OTLP HTTP: ${var.service_discovery_namespace_id != "" ? "http://${var.service_name}.${var.service_discovery_namespace_name}:4318" : "http://${var.service_name}.${aws_service_discovery_private_dns_namespace.grafana[0].name}:4318"} + + 4. MONITORING DATA SOURCES: + - Prometheus: Pre-configured for metrics + - Tempo: Pre-configured for distributed traces + - Loki: Pre-configured for logs aggregation + + 5. TROUBLESHOOTING: + - Check ECS service status: aws ecs describe-services --cluster ${var.cluster_name} --services ${local.name_prefix} + - View Grafana logs: aws logs tail ${aws_cloudwatch_log_group.grafana.name} --follow + - Test connectivity from application security groups + + 6. INTEGRATION WITH APPLICATIONS: + To send telemetry data to this Grafana instance, configure your applications with: + - OTEL_EXPORTER_OTLP_ENDPOINT: ${var.service_discovery_namespace_id != "" ? "http://${var.service_name}.${var.service_discovery_namespace_name}:4317" : "http://${var.service_name}.${aws_service_discovery_private_dns_namespace.grafana[0].name}:4317"} + - Ensure your application security groups are added to otlp_sources_security_group_ids + + EOT +} diff --git a/iac/modules/grafana-otel/variables.tf b/iac/modules/grafana-otel/variables.tf new file mode 100644 index 0000000000..f972492aee --- /dev/null +++ b/iac/modules/grafana-otel/variables.tf @@ -0,0 +1,156 @@ +# Core Infrastructure Variables +variable "aws_region" { + description = "AWS region for deployment" + type = string + default = "us-east-1" +} + +variable "environment" { + description = "Environment name (e.g., production, staging, dev)" + type = string + default = "production" +} + +variable "name_prefix" { + description = "Prefix for all resource names" + type = string + default = "grafana-otel" +} + +# Network Configuration +variable "vpc_id" { + description = "VPC ID where Grafana will be deployed" + type = string +} + +variable "private_subnet_ids" { + description = "Private subnet IDs for Grafana ECS tasks" + type = list(string) +} + +variable "allowed_cidr_blocks" { + description = "CIDR blocks allowed to access Grafana UI (port 3000)" + type = list(string) + default = ["10.0.0.0/8", "172.16.0.0/12", "192.168.0.0/16"] +} + +# ECS Configuration +variable "cluster_name" { + description = "ECS cluster name where Grafana will be deployed" + type = string +} + +variable "container_image" { + description = "Grafana OTEL LGTM container image" + type = string + default = "grafana/otel-lgtm:latest" +} + +variable "cpu" { + description = "CPU units for Grafana task" + type = number + default = 1024 +} + +variable "memory" { + description = "Memory (MB) for Grafana task" + type = number + default = 2048 +} + +variable "desired_count" { + description = "Desired number of Grafana tasks" + type = number + default = 1 +} + +# Grafana Configuration +variable "grafana_admin_user" { + description = "Grafana admin username" + type = string + default = "admin" +} + +variable "grafana_admin_password" { + description = "Grafana admin password" + type = string + default = "openwebui_monitoring_2024" + sensitive = true +} + +# Service Discovery Configuration +variable "service_discovery_namespace_id" { + description = "Service discovery namespace ID (if using existing namespace)" + type = string + default = "" +} + +variable "service_discovery_namespace_name" { + description = "Service discovery namespace name (creates new if namespace_id not provided)" + type = string + default = "grafana-monitoring" +} + +variable "service_name" { + description = "Service discovery service name" + type = string + default = "otel-monitor" +} + +# Monitoring Configuration +variable "log_retention_days" { + description = "CloudWatch log retention in days" + type = number + default = 7 +} + +variable "enable_autoscaling" { + description = "Enable ECS autoscaling for Grafana" + type = bool + default = true +} + +variable "max_capacity" { + description = "Maximum number of tasks for autoscaling" + type = number + default = 2 +} + +variable "min_capacity" { + description = "Minimum number of tasks for autoscaling" + type = number + default = 1 +} + +variable "cpu_target_value" { + description = "Target CPU utilization for autoscaling" + type = number + default = 80.0 +} + +# Security Configuration +variable "additional_security_group_ids" { + description = "Additional security group IDs to attach to Grafana tasks" + type = list(string) + default = [] +} + +variable "enable_execute_command" { + description = "Enable ECS execute command for debugging" + type = bool + default = true +} + +# OpenTelemetry Configuration +variable "otlp_sources_security_group_ids" { + description = "Security group IDs that should be allowed to send OTLP data to Grafana" + type = list(string) + default = [] +} + +# Tags +variable "tags" { + description = "Additional tags for all resources" + type = map(string) + default = {} +} diff --git a/iac/modules/grafana-otel/versions.tf b/iac/modules/grafana-otel/versions.tf new file mode 100644 index 0000000000..ddfcb0e054 --- /dev/null +++ b/iac/modules/grafana-otel/versions.tf @@ -0,0 +1,10 @@ +terraform { + required_version = ">= 1.0" + + required_providers { + aws = { + source = "hashicorp/aws" + version = ">= 5.0" + } + } +}