awk sits between grep and a full programming language. It’s perfect for columnar data, log files, and quick text transformations.
The Basic Pattern#
1
| awk 'pattern { action }' file
|
If pattern matches, run action. No pattern means every line. No action means print.
1
2
3
4
5
6
7
8
9
10
11
| # Print everything
awk '{ print }' file.txt
# Print lines matching pattern
awk '/error/' file.txt
# Print second column
awk '{ print $2 }' file.txt
# Combined: errors, show timestamp and message
awk '/error/ { print $1, $4 }' app.log
|
Field Handling#
awk splits lines into fields by whitespace (default):
1
2
3
4
5
6
7
8
9
10
11
12
13
14
| # $0 = entire line
# $1 = first field
# $2 = second field
# NF = number of fields
# $NF = last field
echo "alice 30 engineer" | awk '{ print $1 }'
# Output: alice
echo "alice 30 engineer" | awk '{ print $NF }'
# Output: engineer
echo "one two three four" | awk '{ print $(NF-1) }'
# Output: three
|
Custom Delimiters#
1
2
3
4
5
6
7
8
9
10
11
| # CSV (comma-separated)
awk -F',' '{ print $2 }' data.csv
# Colon-separated (like /etc/passwd)
awk -F':' '{ print $1, $3 }' /etc/passwd
# Multiple delimiters
awk -F'[,;:]' '{ print $1 }' mixed.txt
# Tab-separated
awk -F'\t' '{ print $1 }' data.tsv
|
Built-in Variables#
| Variable | Meaning |
|---|
$0 | Entire line |
$1, $2... | Fields |
NF | Number of fields |
NR | Current line number |
FNR | Line number in current file |
FS | Field separator (input) |
OFS | Output field separator |
RS | Record separator |
ORS | Output record separator |
FILENAME | Current filename |
1
2
3
4
5
6
7
8
| # Print with line numbers
awk '{ print NR, $0 }' file.txt
# Print lines 10-20
awk 'NR >= 10 && NR <= 20' file.txt
# Different output separator
awk -F',' 'BEGIN { OFS="\t" } { print $1, $2 }' data.csv
|
Patterns and Conditions#
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
| # Regex match
awk '/pattern/' file.txt
# Negated match
awk '!/pattern/' file.txt
# Field matches pattern
awk '$3 ~ /error/' file.txt
# Numeric comparisons
awk '$2 > 100' data.txt
# Multiple conditions
awk '$2 > 100 && $3 == "active"' data.txt
# Range (from start to end pattern)
awk '/START/,/END/' file.txt
|
BEGIN and END Blocks#
1
2
3
4
5
6
7
8
9
10
11
12
13
| # Print header and footer
awk 'BEGIN { print "=== Report ===" }
{ print }
END { print "=== End ===" }' data.txt
# Count lines
awk 'END { print NR }' file.txt
# Sum a column
awk '{ sum += $2 } END { print "Total:", sum }' data.txt
# Average
awk '{ sum += $2; count++ } END { print "Avg:", sum/count }' data.txt
|
String Functions#
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
| # Length
awk '{ print length($1) }' file.txt
# Substring (1-indexed)
awk '{ print substr($1, 1, 3) }' file.txt
# Split into array
awk '{ split($0, arr, ":"); print arr[1] }' file.txt
# Replace
awk '{ gsub(/old/, "new"); print }' file.txt
# Uppercase/lowercase
awk '{ print toupper($1) }' file.txt
awk '{ print tolower($1) }' file.txt
|
1
2
3
4
5
6
7
8
| # Formatted output
awk '{ printf "%-20s %10.2f\n", $1, $2 }' data.txt
# Right-aligned columns
awk '{ printf "%10s %10s %10s\n", $1, $2, $3 }' data.txt
# Padding with zeros
awk '{ printf "%05d\n", $1 }' numbers.txt
|
Arrays (Associative)#
1
2
3
4
5
6
7
8
| # Count occurrences
awk '{ count[$1]++ } END { for (k in count) print k, count[k] }' file.txt
# Group and sum
awk '{ total[$1] += $2 } END { for (k in total) print k, total[k] }' sales.txt
# Store lines by key
awk '{ lines[$1] = $0 } END { for (k in lines) print lines[k] }' file.txt
|
Log Analysis Examples#
Apache/Nginx Access Logs#
1
2
3
4
5
6
7
8
9
10
11
12
13
14
| # Count requests per IP
awk '{ count[$1]++ } END { for (ip in count) print count[ip], ip }' access.log | sort -rn | head
# Count by status code
awk '{ count[$9]++ } END { for (code in count) print code, count[code] }' access.log
# Requests per hour
awk -F'[\\[:]' '{ print $2":"$3 }' access.log | sort | uniq -c
# Slow requests (>1s)
awk '$NF > 1.0 { print }' access.log
# Bandwidth by path
awk '{ bw[$7] += $10 } END { for (path in bw) print bw[path], path }' access.log | sort -rn | head
|
Application Logs#
1
2
3
4
5
6
7
8
9
10
11
12
13
| # Extract errors with timestamp
awk '/ERROR/ { print $1, $2, $0 }' app.log
# Count errors by type
awk '/ERROR/ {
match($0, /ERROR: ([^:]+)/, arr)
count[arr[1]]++
} END {
for (e in count) print count[e], e
}' app.log | sort -rn
# Time range filter
awk '$2 >= "10:00:00" && $2 <= "11:00:00"' app.log
|
System Logs#
1
2
3
4
5
6
7
8
| # Failed SSH attempts
awk '/Failed password/ { print $(NF-3) }' /var/log/auth.log | sort | uniq -c | sort -rn
# Disk usage per user
df -h | awk 'NR>1 { print $5, $6 }'
# Process memory usage
ps aux | awk 'NR>1 { mem[$1] += $4 } END { for (u in mem) print mem[u], u }' | sort -rn
|
CSV to JSON#
1
2
3
4
5
6
7
8
9
10
11
12
| awk -F',' 'NR==1 {
for (i=1; i<=NF; i++) header[i]=$i
next
}
{
printf "{"
for (i=1; i<=NF; i++) {
printf "\"%s\":\"%s\"", header[i], $i
if (i<NF) printf ","
}
print "}"
}' data.csv
|
Join Fields#
1
2
3
4
5
| # Combine fields with delimiter
awk '{ print $1 "-" $2 "-" $3 }' file.txt
# Join all fields
awk '{ $1=$1; print }' OFS=',' file.txt
|
Transpose Columns#
1
2
3
4
5
6
7
8
9
| awk '{
for (i=1; i<=NF; i++) a[NR,i]=$i
}
END {
for (j=1; j<=NF; j++) {
for (i=1; i<=NR; i++) printf "%s ", a[i,j]
print ""
}
}' file.txt
|
Control Flow#
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
| # If/else
awk '{
if ($2 > 100)
print $1, "high"
else
print $1, "low"
}' data.txt
# Ternary
awk '{ print $1, ($2 > 100 ? "high" : "low") }' data.txt
# Skip lines
awk 'NR <= 5 { next } { print }' file.txt # Skip first 5
# Exit early
awk '/STOP/ { exit } { print }' file.txt
|
Multi-file Processing#
1
2
3
4
5
6
7
8
| # Process multiple files, track which file
awk '{ print FILENAME, $0 }' file1.txt file2.txt
# Reset counter per file
awk 'FNR == 1 { print "---", FILENAME, "---" } { print }' *.txt
# Compare files
awk 'NR==FNR { a[$1]; next } $1 in a' file1.txt file2.txt
|
One-Liners Reference#
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
| # Remove duplicate lines (preserves order)
awk '!seen[$0]++' file.txt
# Print unique values in column
awk '{ print $1 }' file.txt | sort -u
# Sum numbers in file
awk '{ sum += $1 } END { print sum }' numbers.txt
# Number non-blank lines
awk 'NF { print ++n, $0 }' file.txt
# Reverse fields
awk '{ for (i=NF; i>0; i--) printf "%s ", $i; print "" }' file.txt
# Remove blank lines
awk 'NF' file.txt
# Print between markers
awk '/START/,/END/' file.txt
# Add line numbers
awk '{ print NR": "$0 }' file.txt
|
1
2
3
4
5
6
7
8
| # Find + awk
find . -name "*.log" -exec awk '/ERROR/ { print FILENAME, $0 }' {} +
# Pipe chain
cat access.log | awk '{ print $1 }' | sort | uniq -c | sort -rn | head
# With xargs
awk '{ print $1 }' urls.txt | xargs -I{} curl -s {}
|
awk turns “I need to write a script for this” into a one-liner. Learn the patterns, use them forever.
π¬ Get the Newsletter
Weekly insights on DevOps, automation, and CLI mastery. No spam, unsubscribe anytime.