Skip to main content
DevOps & CloudDocumented

terraform-architect

Terraform and Terragrunt IaC architect. Module design, remote state, multi-environment configs, CI/CD for infrastructure, provider version management, and production patterns for AWS, GCP, and Azure.

Share:

Installation

npx clawhub@latest install terraform-architect

View the full skill documentation and source below.

Documentation

Terraform Infrastructure as Code Architect

Terraform Mental Model

terraform init    → download providers, set up backend
terraform plan    → show what WILL change (never destructive)
terraform apply   → make it so
terraform destroy → nuclear option

State is everything. State = Terraform's view of what it created. Protect it.


Project Structure

Single Environment (Simple)

├── main.tf          # Resources
├── variables.tf     # Input declarations
├── outputs.tf       # Output declarations
├── versions.tf      # Provider + terraform version constraints
├── terraform.tfvars # Variable values (git-ignored for secrets)
└── terraform.tfvars.example

Multi-Environment (Production Pattern)

├── modules/
│   ├── vpc/
│   │   ├── main.tf
│   │   ├── variables.tf
│   │   └── outputs.tf
│   ├── eks/
│   └── rds/
├── environments/
│   ├── dev/
│   │   ├── main.tf
│   │   ├── variables.tf
│   │   └── backend.tf
│   ├── staging/
│   └── prod/
└── .terraform.lock.hcl  # Lock file — ALWAYS commit this

Terragrunt Structure (Best for large orgs)

├── terragrunt.hcl          # Root config — remote state, common vars
├── modules/                # Reusable modules
│   ├── vpc/
│   └── eks/
└── live/
    ├── dev/
    │   ├── vpc/
    │   │   └── terragrunt.hcl  # Thin config, DRY
    │   └── eks/
    │       └── terragrunt.hcl
    ├── staging/
    └── prod/

versions.tf — Always Pin Versions

terraform {
  required_version = ">= 1.6.0, < 2.0.0"
  
  required_providers {
    aws = {
      source  = "hashicorp/aws"
      version = "~> 5.0"  # Minor updates only, no major
    }
    kubernetes = {
      source  = "hashicorp/kubernetes"
      version = "~> 2.23"
    }
    random = {
      source  = "hashicorp/random"
      version = "~> 3.5"
    }
  }
}

Remote State (Required for Teams)

# backend.tf — S3 + DynamoDB for AWS
terraform {
  backend "s3" {
    bucket         = "mycompany-terraform-state"
    key            = "production/myapp/terraform.tfstate"
    region         = "us-east-1"
    encrypt        = true                          # SSE-S3
    kms_key_id     = "arn:aws:kms:..."             # Optional KMS
    dynamodb_table = "terraform-state-lock"       # Prevents concurrent applies
    
    # For workspace support:
    # key = "env:/${terraform.workspace}/myapp/terraform.tfstate"
  }
}

# Create the state bucket and lock table ONCE, manually or with a bootstrap module
resource "aws_s3_bucket" "terraform_state" {
  bucket = "mycompany-terraform-state"
  
  lifecycle {
    prevent_destroy = true  # Never accidentally delete
  }
}

resource "aws_s3_bucket_versioning" "state" {
  bucket = aws_s3_bucket.terraform_state.id
  versioning_configuration {
    status = "Enabled"  # Essential for state recovery
  }
}

resource "aws_dynamodb_table" "terraform_lock" {
  name         = "terraform-state-lock"
  billing_mode = "PAY_PER_REQUEST"
  hash_key     = "LockID"
  
  attribute {
    name = "LockID"
    type = "S"
  }
}

Writing Good Modules

Module Structure

# modules/rds/variables.tf
variable "identifier" {
  description = "Unique identifier for this RDS instance"
  type        = string
}

variable "instance_class" {
  description = "RDS instance type"
  type        = string
  default     = "db.t3.micro"
  
  validation {
    condition     = can(regex("^db\\.", var.instance_class))
    error_message = "instance_class must start with 'db.'"
  }
}

variable "tags" {
  description = "Tags to apply to all resources"
  type        = map(string)
  default     = {}
}
# modules/rds/main.tf
resource "aws_db_instance" "main" {
  identifier = var.identifier
  
  engine         = "postgres"
  engine_version = "16.1"
  instance_class = var.instance_class
  
  allocated_storage     = var.storage_gb
  max_allocated_storage = var.storage_gb * 4  # Auto-scale up
  storage_type          = "gp3"
  storage_encrypted     = true  # Always encrypt
  kms_key_id            = var.kms_key_arn
  
  db_name  = var.database_name
  username = var.master_username
  password = random_password.master.result
  
  vpc_security_group_ids = [aws_security_group.rds.id]
  db_subnet_group_name   = aws_db_subnet_group.main.name
  
  backup_retention_period = var.environment == "prod" ? 30 : 7
  backup_window           = "03:00-04:00"
  maintenance_window      = "sun:04:00-sun:05:00"
  
  deletion_protection = var.environment == "prod"
  skip_final_snapshot = var.environment != "prod"
  final_snapshot_identifier = var.environment == "prod" ? "${var.identifier}-final" : null
  
  # Enable enhanced monitoring
  monitoring_interval = 60
  monitoring_role_arn = aws_iam_role.rds_monitoring.arn
  
  # Enable Performance Insights
  performance_insights_enabled          = true
  performance_insights_retention_period = 7
  
  tags = merge(var.tags, {
    Name        = var.identifier
    Environment = var.environment
    Terraform   = "true"
  })
}

# Random password — never hardcode
resource "random_password" "master" {
  length           = 32
  special          = true
  override_special = "!#$%&*()-_=+[]{}<>:?"
}

# Store in Secrets Manager
resource "aws_secretsmanager_secret" "rds_password" {
  name = "/${var.environment}/${var.identifier}/db-password"
}

resource "aws_secretsmanager_secret_version" "rds_password" {
  secret_id = aws_secretsmanager_secret.rds_password.id
  secret_string = jsonencode({
    username = var.master_username
    password = random_password.master.result
    host     = aws_db_instance.main.address
    port     = aws_db_instance.main.port
    dbname   = var.database_name
  })
}
# modules/rds/outputs.tf
output "endpoint" {
  description = "RDS connection endpoint"
  value       = aws_db_instance.main.address
}

output "port" {
  value = aws_db_instance.main.port
}

output "secret_arn" {
  description = "ARN of the Secrets Manager secret containing credentials"
  value       = aws_secretsmanager_secret.rds_password.arn
}

Variables and Locals

# variables.tf
variable "environment" {
  description = "Deployment environment"
  type        = string
  
  validation {
    condition     = contains(["dev", "staging", "prod"], var.environment)
    error_message = "environment must be dev, staging, or prod"
  }
}

variable "region" {
  type    = string
  default = "us-east-1"
}

# locals.tf — computed values and DRY helpers
locals {
  # Common tags applied to ALL resources
  common_tags = {
    Project     = "myapp"
    Environment = var.environment
    ManagedBy   = "terraform"
    Owner       = "platform-team"
    CostCenter  = "engineering"
  }
  
  # Environment-specific sizes
  instance_sizes = {
    dev     = "t3.micro"
    staging = "t3.small"
    prod    = "m5.large"
  }
  
  is_prod = var.environment == "prod"
  
  # Naming convention
  name_prefix = "myapp-${var.environment}"
}

Data Sources and Imports

# Reference existing resources without managing them
data "aws_vpc" "main" {
  filter {
    name   = "tag:Name"
    values = ["${var.environment}-vpc"]
  }
}

data "aws_subnets" "private" {
  filter {
    name   = "vpc-id"
    values = [data.aws_vpc.main.id]
  }
  
  tags = {
    Tier = "private"
  }
}

data "aws_caller_identity" "current" {}
data "aws_region" "current" {}

# Use in resources
resource "aws_security_group" "app" {
  vpc_id = data.aws_vpc.main.id
  # ...
}

# Import existing resource into state (Terraform 1.5+)
# In terraform block:
import {
  to = aws_s3_bucket.existing
  id = "my-existing-bucket-name"
}

For_Each and Dynamic Blocks

# Create multiple resources from a map
variable "services" {
  type = map(object({
    port  = number
    image = string
  }))
  default = {
    api = { port = 8080, image = "api:1.0" }
    web = { port = 3000, image = "web:1.0" }
  }
}

resource "aws_ecs_service" "services" {
  for_each = var.services
  
  name = "${local.name_prefix}-${each.key}"
  # each.key = "api" or "web"
  # each.value.port = 8080 or 3000
}

# Dynamic blocks — avoid repetition
resource "aws_security_group" "app" {
  name   = "${local.name_prefix}-app"
  vpc_id = data.aws_vpc.main.id
  
  dynamic "ingress" {
    for_each = var.allowed_ports
    content {
      from_port   = ingress.value
      to_port     = ingress.value
      protocol    = "tcp"
      cidr_blocks = var.allowed_cidrs
    }
  }
}

Lifecycle and Dependencies

resource "aws_instance" "app" {
  ami           = data.aws_ami.app.id
  instance_type = "t3.micro"
  
  lifecycle {
    # Create replacement before destroying old (zero-downtime)
    create_before_destroy = true
    
    # Prevent accidental destroy in prod
    prevent_destroy = local.is_prod
    
    # Ignore changes to specific fields (e.g., auto-updated by AWS)
    ignore_changes = [ami, tags["LastDeployment"]]
    
    # Only replace when these fields change
    replace_triggered_by = [aws_launch_template.app.latest_version]
  }
  
  # Explicit dependency when Terraform can't infer it
  depends_on = [aws_iam_role_policy_attachment.app]
}

CI/CD for Infrastructure

GitHub Actions Workflow

# .github/workflows/terraform.yml
name: Terraform

on:
  push:
    branches: [main]
    paths: ['infrastructure/**']
  pull_request:
    paths: ['infrastructure/**']

jobs:
  terraform:
    runs-on: ubuntu-latest
    defaults:
      run:
        working-directory: infrastructure/environments/prod
    
    permissions:
      id-token: write     # For OIDC auth to AWS
      contents: read
      pull-requests: write
    
    steps:
      - uses: actions/checkout@v4
      
      - name: Configure AWS credentials (OIDC - no long-lived keys)
        uses: aws-actions/configure-aws-credentials@v4
        with:
          role-to-assume: arn:aws:iam::123456789:role/github-actions-terraform
          aws-region: us-east-1
      
      - uses: hashicorp/setup-terraform@v3
        with:
          terraform_version: "~> 1.6"
      
      - run: terraform init
      - run: terraform validate
      - run: terraform fmt -check -recursive
      
      # tfsec security scanning
      - uses: aquasecurity/[email protected]
      
      # checkov policy-as-code
      - uses: bridgecrewio/checkov-action@master
        with:
          directory: .
          soft_fail: false
      
      - name: Terraform Plan
        id: plan
        run: terraform plan -out=tfplan -no-color
      
      # Post plan diff as PR comment
      - uses: actions/github-script@v7
        if: github.event_name == 'pull_request'
        with:
          script: |
            const output = `#### Terraform Plan \`${{ steps.plan.outcome }}\`
            \`\`\`
            ${{ steps.plan.outputs.stdout }}
            \`\`\``;
            github.rest.issues.createComment({
              issue_number: context.issue.number,
              owner: context.repo.owner,
              repo: context.repo.repo,
              body: output
            });
      
      # Only apply on main branch push
      - name: Terraform Apply
        if: github.ref == 'refs/heads/main' && github.event_name == 'push'
        run: terraform apply tfplan

Common Patterns and Anti-Patterns

✅ DO

# Use remote state — never commit tfstate
# Use modules for reusable infrastructure
# Use locals for computed values
# Use count/for_each for multiple similar resources
# Tag every resource with environment, project, owner
# Use random_password, never hardcode credentials
# Enable deletion_protection on production databases
# Use data sources to reference existing infrastructure
# Lock provider versions with .terraform.lock.hcl

❌ DON'T

# Don't commit .terraform/ directory
# Don't commit terraform.tfstate or terraform.tfstate.backup  
# Don't hardcode credentials or secrets
# Don't use count when you need stable identity (use for_each with maps)
# Don't run apply manually in CI — use -auto-approve only with approval gates
# Don't ignore the plan output
# Don't use latest provider versions in production

Terragrunt DRY Pattern

# terragrunt.hcl (root)
locals {
  account_vars = read_terragrunt_config(find_in_parent_folders("account.hcl"))
  env_vars     = read_terragrunt_config(find_in_parent_folders("env.hcl"))
  
  account_id  = local.account_vars.locals.aws_account_id
  environment = local.env_vars.locals.environment
}

# Remote state config — DRY across all modules
remote_state {
  backend = "s3"
  config = {
    bucket         = "mycompany-terraform-${local.account_id}"
    key            = "${path_relative_to_include()}/terraform.tfstate"
    region         = "us-east-1"
    encrypt        = true
    dynamodb_table = "terraform-state-lock"
  }
  generate = {
    path      = "backend.tf"
    if_exists = "overwrite_terragrunt"
  }
}

# Common inputs for all modules
inputs = {
  environment = local.environment
  tags = {
    ManagedBy   = "terragrunt"
    Environment = local.environment
  }
}
# live/prod/rds/terragrunt.hcl
include "root" {
  path = find_in_parent_folders()
}

terraform {
  source = "../../../modules/rds"
}

dependency "vpc" {
  config_path = "../vpc"
}

inputs = {
  vpc_id     = dependency.vpc.outputs.vpc_id
  subnet_ids = dependency.vpc.outputs.private_subnet_ids
  
  identifier     = "myapp-prod"
  instance_class = "db.m5.large"
  storage_gb     = 100
}