awk is a programming language disguised as a command-line tool. It processes text line by line, splitting each into fields. Most tasks need just one line.
The Basics#
1
2
3
4
5
6
7
8
9
10
11
12
13
14
| # Print entire line
awk '{print}' file.txt
# Print specific field (space-delimited)
awk '{print $1}' file.txt # First field
awk '{print $2}' file.txt # Second field
awk '{print $NF}' file.txt # Last field
awk '{print $(NF-1)}' file.txt # Second to last
# Print multiple fields
awk '{print $1, $3}' file.txt
# Custom output format
awk '{print $1 " -> " $2}' file.txt
|
Field Separators#
1
2
3
4
5
6
7
8
9
10
11
| # Colon-separated (like /etc/passwd)
awk -F: '{print $1}' /etc/passwd
# Tab-separated
awk -F'\t' '{print $2}' data.tsv
# Multiple separators
awk -F'[,;]' '{print $1}' file.txt
# Set output separator
awk -F: 'BEGIN{OFS=","} {print $1,$3}' /etc/passwd
|
Filtering Lines#
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
| # Lines matching pattern
awk '/error/' logfile.txt
# Lines NOT matching pattern
awk '!/debug/' logfile.txt
# Field matches value
awk '$3 == "ERROR"' logfile.txt
# Numeric comparison
awk '$2 > 100' data.txt
# Multiple conditions
awk '$2 > 100 && $3 == "active"' data.txt
# Line number range
awk 'NR >= 10 && NR <= 20' file.txt
|
Built-in Variables#
1
2
3
4
5
6
7
| NR # Current line number (total)
NF # Number of fields in current line
FNR # Line number in current file
FS # Field separator (input)
OFS # Output field separator
RS # Record separator (default: newline)
ORS # Output record separator
|
1
2
3
4
5
6
7
8
| # Print line numbers
awk '{print NR, $0}' file.txt
# Print lines with more than 3 fields
awk 'NF > 3' file.txt
# Print total lines at end
awk 'END{print NR}' file.txt
|
Arithmetic#
1
2
3
4
5
6
7
8
9
10
11
| # Sum a column
awk '{sum += $2} END{print sum}' data.txt
# Average
awk '{sum += $2; count++} END{print sum/count}' data.txt
# Min/Max
awk 'NR==1{min=max=$2} $2>max{max=$2} $2<min{min=$2} END{print min, max}' data.txt
# Calculate percentage
awk '{print $1, $2, ($2/$3)*100 "%"}' data.txt
|
String Operations#
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
| # Length of field
awk '{print length($1)}' file.txt
# Substring
awk '{print substr($1, 1, 3)}' file.txt # First 3 chars
# Convert case
awk '{print toupper($1)}' file.txt
awk '{print tolower($1)}' file.txt
# String concatenation
awk '{print $1 $2}' file.txt # No space
awk '{print $1 " " $2}' file.txt # With space
# Split string
awk '{split($1, arr, "-"); print arr[1]}' file.txt
|
Conditional Logic#
1
2
3
4
5
6
7
8
9
10
11
12
13
| # If-else
awk '{if ($2 > 100) print "high"; else print "low"}' data.txt
# Ternary
awk '{print ($2 > 100 ? "high" : "low")}' data.txt
# Multiple conditions
awk '{
if ($2 > 100) status = "high"
else if ($2 > 50) status = "medium"
else status = "low"
print $1, status
}' data.txt
|
BEGIN and END#
1
2
3
4
5
| # Header and footer
awk 'BEGIN{print "Name\tScore"} {print $1"\t"$2} END{print "---\nTotal: " NR}' data.txt
# Initialize variables
awk 'BEGIN{count=0} /error/{count++} END{print count " errors"}' logfile.txt
|
Practical One-Liners#
Log Analysis#
1
2
3
4
5
6
7
8
9
10
11
| # Count occurrences of each status code
awk '{print $9}' access.log | sort | uniq -c | sort -rn
# Or all in awk
awk '{count[$9]++} END{for (code in count) print count[code], code}' access.log
# Requests per IP
awk '{count[$1]++} END{for (ip in count) print count[ip], ip}' access.log | sort -rn | head
# Slow requests (response time > 1s)
awk '$NF > 1.0 {print $7, $NF}' access.log
|
CSV Processing#
1
2
3
4
5
6
7
8
9
10
11
| # Print specific columns
awk -F, '{print $1","$3}' data.csv
# Skip header
awk -F, 'NR > 1 {print $2}' data.csv
# Sum a column
awk -F, 'NR > 1 {sum += $3} END{print sum}' data.csv
# Filter by value
awk -F, '$4 == "active"' data.csv
|
System Administration#
1
2
3
4
5
6
7
8
9
10
11
| # Disk usage over 80%
df -h | awk '$5+0 > 80 {print $6, $5}'
# Memory by process
ps aux | awk '{mem[$11] += $6} END{for (proc in mem) print mem[proc], proc}' | sort -rn | head
# Users with bash shell
awk -F: '$7 ~ /bash/ {print $1}' /etc/passwd
# Show listening ports
netstat -tlnp | awk '$6 == "LISTEN" {print $4}'
|
1
2
3
4
5
6
7
8
9
10
11
| # Transpose rows to columns
awk '{for (i=1; i<=NF; i++) a[i,NR]=$i} END{for (i=1; i<=NF; i++) {for (j=1; j<=NR; j++) printf a[i,j] " "; print ""}}' file.txt
# Remove duplicate lines (preserving order)
awk '!seen[$0]++' file.txt
# Print unique values from column
awk '{print $2}' file.txt | awk '!seen[$0]++'
# Join lines with comma
awk '{printf "%s%s", sep, $0; sep=","} END{print ""}' file.txt
|
Text Manipulation#
1
2
3
4
5
6
7
8
9
10
11
12
13
14
| # Remove blank lines
awk 'NF' file.txt
# Remove leading/trailing whitespace
awk '{$1=$1}1' file.txt
# Replace field value
awk '{$2 = "REDACTED"; print}' file.txt
# Add line numbers
awk '{print NR": "$0}' file.txt
# Print every Nth line
awk 'NR % 5 == 0' file.txt
|
1
2
3
4
5
6
7
8
| # Filter then process
grep "ERROR" logfile.txt | awk '{print $5}'
# Process then sort
awk -F: '{print $3, $1}' /etc/passwd | sort -n
# Use in pipeline
cat data.txt | awk '{print $2}' | sort | uniq -c
|
Multi-line Scripts#
For complex logic, use a script file:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
| #!/usr/bin/awk -f
# script.awk
BEGIN {
FS = ","
print "Processing..."
}
NR == 1 {
# Save header
for (i = 1; i <= NF; i++) header[i] = $i
next
}
$4 > 100 {
# Process qualifying rows
total += $4
count++
print $1, $2, $4
}
END {
print "---"
print "Count:", count
print "Total:", total
print "Average:", total/count
}
|
1
| awk -f script.awk data.csv
|
Quick Reference#
| Task | Command |
|---|
| Print column | awk '{print $N}' |
| Filter rows | awk '/pattern/' |
| Sum column | awk '{s+=$N}END{print s}' |
| Count lines | awk 'END{print NR}' |
| Custom delimiter | awk -F',' |
| Field equals | awk '$N == "value"' |
| Skip header | awk 'NR > 1' |
| Unique lines | awk '!seen[$0]++' |
| Last field | awk '{print $NF}' |
awk’s learning curve is shallow for simple tasks and deep for complex ones. Start with '{print $N}' and filtering, then add arithmetic and conditionals as needed. Most text processing tasks that seem complex become simple one-liners once you know the patterns.
๐ฌ Get the Newsletter
Weekly insights on DevOps, automation, and CLI mastery. No spam, unsubscribe anytime.