Infrastructure code deserves the same testing rigor as application code. A typo in Terraform can delete a database. An untested Ansible role can break production. Let’s build confidence with proper testing.
The Testing Pyramid for Infrastructure E ( I ( 2 F n R E u t e U l e a n ( T l g l i S e r t t s s a c a t t t l T t s a i o e i c o u s c k n d t s a d T r n e e e a p s s l l t o y o s u s y r i m c s e e , n s t ) p ) l a n v a l i d a t i o n ) Unit Testing: Static Analysis Terraform Validation 1 2 3 4 5 6 7 8 # Built-in validation terraform init terraform validate terraform fmt -check # Custom validation rules terraform plan -out=tfplan terraform show -json tfplan > plan.json 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 # tests/test_terraform_plan.py import json import pytest @pytest.fixture def plan(): with open('plan.json') as f: return json.load(f) def test_no_resources_destroyed(plan): """Ensure no resources are being destroyed.""" changes = plan.get('resource_changes', []) destroyed = [c for c in changes if 'delete' in c.get('change', {}).get('actions', [])] assert len(destroyed) == 0, f"Resources being destroyed: {[d['address'] for d in destroyed]}" def test_no_public_s3_buckets(plan): """Ensure S3 buckets aren't public.""" changes = plan.get('resource_changes', []) for change in changes: if change['type'] == 'aws_s3_bucket': after = change.get('change', {}).get('after', {}) acl = after.get('acl', 'private') assert acl == 'private', f"Bucket {change['address']} has public ACL: {acl}" def test_instances_have_tags(plan): """Ensure EC2 instances have required tags.""" required_tags = {'Environment', 'Owner', 'Project'} changes = plan.get('resource_changes', []) for change in changes: if change['type'] == 'aws_instance': after = change.get('change', {}).get('after', {}) tags = set(after.get('tags', {}).keys()) missing = required_tags - tags assert not missing, f"Instance {change['address']} missing tags: {missing}" Policy as Code with OPA 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 # policy/terraform.rego package terraform deny[msg] { resource := input.resource_changes[_] resource.type == "aws_security_group_rule" resource.change.after.cidr_blocks[_] == "0.0.0.0/0" resource.change.after.from_port == 22 msg := sprintf("SSH open to world in %v", [resource.address]) } deny[msg] { resource := input.resource_changes[_] resource.type == "aws_db_instance" resource.change.after.publicly_accessible == true msg := sprintf("RDS instance %v is publicly accessible", [resource.address]) } 1 2 # Run OPA checks terraform show -json tfplan | opa eval -i - -d policy/ "data.terraform.deny" Integration Testing with Terratest Terratest deploys real infrastructure, validates it, then tears it down:
...