awk sits between grep and a full programming language. It’s perfect for columnar data, log files, and quick text transformations.

The Basic Pattern

1
awk 'pattern { action }' file

If pattern matches, run action. No pattern means every line. No action means print.

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
# Print everything
awk '{ print }' file.txt

# Print lines matching pattern
awk '/error/' file.txt

# Print second column
awk '{ print $2 }' file.txt

# Combined: errors, show timestamp and message
awk '/error/ { print $1, $4 }' app.log

Field Handling

awk splits lines into fields by whitespace (default):

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
# $0 = entire line
# $1 = first field
# $2 = second field
# NF = number of fields
# $NF = last field

echo "alice 30 engineer" | awk '{ print $1 }'
# Output: alice

echo "alice 30 engineer" | awk '{ print $NF }'
# Output: engineer

echo "one two three four" | awk '{ print $(NF-1) }'
# Output: three

Custom Delimiters

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
# CSV (comma-separated)
awk -F',' '{ print $2 }' data.csv

# Colon-separated (like /etc/passwd)
awk -F':' '{ print $1, $3 }' /etc/passwd

# Multiple delimiters
awk -F'[,;:]' '{ print $1 }' mixed.txt

# Tab-separated
awk -F'\t' '{ print $1 }' data.tsv

Built-in Variables

VariableMeaning
$0Entire line
$1, $2...Fields
NFNumber of fields
NRCurrent line number
FNRLine number in current file
FSField separator (input)
OFSOutput field separator
RSRecord separator
ORSOutput record separator
FILENAMECurrent filename
1
2
3
4
5
6
7
8
# Print with line numbers
awk '{ print NR, $0 }' file.txt

# Print lines 10-20
awk 'NR >= 10 && NR <= 20' file.txt

# Different output separator
awk -F',' 'BEGIN { OFS="\t" } { print $1, $2 }' data.csv

Patterns and Conditions

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
# Regex match
awk '/pattern/' file.txt

# Negated match
awk '!/pattern/' file.txt

# Field matches pattern
awk '$3 ~ /error/' file.txt

# Numeric comparisons
awk '$2 > 100' data.txt

# Multiple conditions
awk '$2 > 100 && $3 == "active"' data.txt

# Range (from start to end pattern)
awk '/START/,/END/' file.txt

BEGIN and END Blocks

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
# Print header and footer
awk 'BEGIN { print "=== Report ===" } 
     { print } 
     END { print "=== End ===" }' data.txt

# Count lines
awk 'END { print NR }' file.txt

# Sum a column
awk '{ sum += $2 } END { print "Total:", sum }' data.txt

# Average
awk '{ sum += $2; count++ } END { print "Avg:", sum/count }' data.txt

String Functions

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
# Length
awk '{ print length($1) }' file.txt

# Substring (1-indexed)
awk '{ print substr($1, 1, 3) }' file.txt

# Split into array
awk '{ split($0, arr, ":"); print arr[1] }' file.txt

# Replace
awk '{ gsub(/old/, "new"); print }' file.txt

# Uppercase/lowercase
awk '{ print toupper($1) }' file.txt
awk '{ print tolower($1) }' file.txt

Printf for Formatting

1
2
3
4
5
6
7
8
# Formatted output
awk '{ printf "%-20s %10.2f\n", $1, $2 }' data.txt

# Right-aligned columns
awk '{ printf "%10s %10s %10s\n", $1, $2, $3 }' data.txt

# Padding with zeros
awk '{ printf "%05d\n", $1 }' numbers.txt

Arrays (Associative)

1
2
3
4
5
6
7
8
# Count occurrences
awk '{ count[$1]++ } END { for (k in count) print k, count[k] }' file.txt

# Group and sum
awk '{ total[$1] += $2 } END { for (k in total) print k, total[k] }' sales.txt

# Store lines by key
awk '{ lines[$1] = $0 } END { for (k in lines) print lines[k] }' file.txt

Log Analysis Examples

Apache/Nginx Access Logs

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
# Count requests per IP
awk '{ count[$1]++ } END { for (ip in count) print count[ip], ip }' access.log | sort -rn | head

# Count by status code
awk '{ count[$9]++ } END { for (code in count) print code, count[code] }' access.log

# Requests per hour
awk -F'[\\[:]' '{ print $2":"$3 }' access.log | sort | uniq -c

# Slow requests (>1s)
awk '$NF > 1.0 { print }' access.log

# Bandwidth by path
awk '{ bw[$7] += $10 } END { for (path in bw) print bw[path], path }' access.log | sort -rn | head

Application Logs

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
# Extract errors with timestamp
awk '/ERROR/ { print $1, $2, $0 }' app.log

# Count errors by type
awk '/ERROR/ { 
  match($0, /ERROR: ([^:]+)/, arr)
  count[arr[1]]++ 
} END { 
  for (e in count) print count[e], e 
}' app.log | sort -rn

# Time range filter
awk '$2 >= "10:00:00" && $2 <= "11:00:00"' app.log

System Logs

1
2
3
4
5
6
7
8
# Failed SSH attempts
awk '/Failed password/ { print $(NF-3) }' /var/log/auth.log | sort | uniq -c | sort -rn

# Disk usage per user
df -h | awk 'NR>1 { print $5, $6 }'

# Process memory usage
ps aux | awk 'NR>1 { mem[$1] += $4 } END { for (u in mem) print mem[u], u }' | sort -rn

Data Transformation

CSV to JSON

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
awk -F',' 'NR==1 { 
  for (i=1; i<=NF; i++) header[i]=$i 
  next 
} 
{ 
  printf "{"
  for (i=1; i<=NF; i++) {
    printf "\"%s\":\"%s\"", header[i], $i
    if (i<NF) printf ","
  }
  print "}"
}' data.csv

Join Fields

1
2
3
4
5
# Combine fields with delimiter
awk '{ print $1 "-" $2 "-" $3 }' file.txt

# Join all fields
awk '{ $1=$1; print }' OFS=',' file.txt

Transpose Columns

1
2
3
4
5
6
7
8
9
awk '{ 
  for (i=1; i<=NF; i++) a[NR,i]=$i 
} 
END { 
  for (j=1; j<=NF; j++) { 
    for (i=1; i<=NR; i++) printf "%s ", a[i,j]
    print "" 
  } 
}' file.txt

Control Flow

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
# If/else
awk '{ 
  if ($2 > 100) 
    print $1, "high"
  else 
    print $1, "low" 
}' data.txt

# Ternary
awk '{ print $1, ($2 > 100 ? "high" : "low") }' data.txt

# Skip lines
awk 'NR <= 5 { next } { print }' file.txt  # Skip first 5

# Exit early
awk '/STOP/ { exit } { print }' file.txt

Multi-file Processing

1
2
3
4
5
6
7
8
# Process multiple files, track which file
awk '{ print FILENAME, $0 }' file1.txt file2.txt

# Reset counter per file
awk 'FNR == 1 { print "---", FILENAME, "---" } { print }' *.txt

# Compare files
awk 'NR==FNR { a[$1]; next } $1 in a' file1.txt file2.txt

One-Liners Reference

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
# Remove duplicate lines (preserves order)
awk '!seen[$0]++' file.txt

# Print unique values in column
awk '{ print $1 }' file.txt | sort -u

# Sum numbers in file
awk '{ sum += $1 } END { print sum }' numbers.txt

# Number non-blank lines
awk 'NF { print ++n, $0 }' file.txt

# Reverse fields
awk '{ for (i=NF; i>0; i--) printf "%s ", $i; print "" }' file.txt

# Remove blank lines
awk 'NF' file.txt

# Print between markers
awk '/START/,/END/' file.txt

# Add line numbers
awk '{ print NR": "$0 }' file.txt

Combining with Other Tools

1
2
3
4
5
6
7
8
# Find + awk
find . -name "*.log" -exec awk '/ERROR/ { print FILENAME, $0 }' {} +

# Pipe chain
cat access.log | awk '{ print $1 }' | sort | uniq -c | sort -rn | head

# With xargs
awk '{ print $1 }' urls.txt | xargs -I{} curl -s {}

awk turns “I need to write a script for this” into a one-liner. Learn the patterns, use them forever.