awk is a programming language disguised as a command-line tool. It processes text line by line, splitting each into fields. Most tasks need just one line.
The Basics 1 2 3 4 5 6 7 8 9 10 11 12 13 14 # Print entire line awk '{print}' file.txt # Print specific field (space-delimited) awk '{print $1}' file.txt # First field awk '{print $2}' file.txt # Second field awk '{print $NF}' file.txt # Last field awk '{print $(NF-1)}' file.txt # Second to last # Print multiple fields awk '{print $1, $3}' file.txt # Custom output format awk '{print $1 " -> " $2}' file.txt Field Separators 1 2 3 4 5 6 7 8 9 10 11 # Colon-separated (like /etc/passwd) awk -F: '{print $1}' /etc/passwd # Tab-separated awk -F'\t' '{print $2}' data.tsv # Multiple separators awk -F'[,;]' '{print $1}' file.txt # Set output separator awk -F: 'BEGIN{OFS=","} {print $1,$3}' /etc/passwd Filtering Lines 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 # Lines matching pattern awk '/error/' logfile.txt # Lines NOT matching pattern awk '!/debug/' logfile.txt # Field matches value awk '$3 == "ERROR"' logfile.txt # Numeric comparison awk '$2 > 100' data.txt # Multiple conditions awk '$2 > 100 && $3 == "active"' data.txt # Line number range awk 'NR >= 10 && NR <= 20' file.txt Built-in Variables 1 2 3 4 5 6 7 NR # Current line number (total) NF # Number of fields in current line FNR # Line number in current file FS # Field separator (input) OFS # Output field separator RS # Record separator (default: newline) ORS # Output record separator 1 2 3 4 5 6 7 8 # Print line numbers awk '{print NR, $0}' file.txt # Print lines with more than 3 fields awk 'NF > 3' file.txt # Print total lines at end awk 'END{print NR}' file.txt Arithmetic 1 2 3 4 5 6 7 8 9 10 11 # Sum a column awk '{sum += $2} END{print sum}' data.txt # Average awk '{sum += $2; count++} END{print sum/count}' data.txt # Min/Max awk 'NR==1{min=max=$2} $2>max{max=$2} $2<min{min=$2} END{print min, max}' data.txt # Calculate percentage awk '{print $1, $2, ($2/$3)*100 "%"}' data.txt String Operations 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 # Length of field awk '{print length($1)}' file.txt # Substring awk '{print substr($1, 1, 3)}' file.txt # First 3 chars # Convert case awk '{print toupper($1)}' file.txt awk '{print tolower($1)}' file.txt # String concatenation awk '{print $1 $2}' file.txt # No space awk '{print $1 " " $2}' file.txt # With space # Split string awk '{split($1, arr, "-"); print arr[1]}' file.txt Conditional Logic 1 2 3 4 5 6 7 8 9 10 11 12 13 # If-else awk '{if ($2 > 100) print "high"; else print "low"}' data.txt # Ternary awk '{print ($2 > 100 ? "high" : "low")}' data.txt # Multiple conditions awk '{ if ($2 > 100) status = "high" else if ($2 > 50) status = "medium" else status = "low" print $1, status }' data.txt BEGIN and END 1 2 3 4 5 # Header and footer awk 'BEGIN{print "Name\tScore"} {print $1"\t"$2} END{print "---\nTotal: " NR}' data.txt # Initialize variables awk 'BEGIN{count=0} /error/{count++} END{print count " errors"}' logfile.txt Practical One-Liners Log Analysis 1 2 3 4 5 6 7 8 9 10 11 # Count occurrences of each status code awk '{print $9}' access.log | sort | uniq -c | sort -rn # Or all in awk awk '{count[$9]++} END{for (code in count) print count[code], code}' access.log # Requests per IP awk '{count[$1]++} END{for (ip in count) print count[ip], ip}' access.log | sort -rn | head # Slow requests (response time > 1s) awk '$NF > 1.0 {print $7, $NF}' access.log CSV Processing 1 2 3 4 5 6 7 8 9 10 11 # Print specific columns awk -F, '{print $1","$3}' data.csv # Skip header awk -F, 'NR > 1 {print $2}' data.csv # Sum a column awk -F, 'NR > 1 {sum += $3} END{print sum}' data.csv # Filter by value awk -F, '$4 == "active"' data.csv System Administration 1 2 3 4 5 6 7 8 9 10 11 # Disk usage over 80% df -h | awk '$5+0 > 80 {print $6, $5}' # Memory by process ps aux | awk '{mem[$11] += $6} END{for (proc in mem) print mem[proc], proc}' | sort -rn | head # Users with bash shell awk -F: '$7 ~ /bash/ {print $1}' /etc/passwd # Show listening ports netstat -tlnp | awk '$6 == "LISTEN" {print $4}' Data Transformation 1 2 3 4 5 6 7 8 9 10 11 # Transpose rows to columns awk '{for (i=1; i<=NF; i++) a[i,NR]=$i} END{for (i=1; i<=NF; i++) {for (j=1; j<=NR; j++) printf a[i,j] " "; print ""}}' file.txt # Remove duplicate lines (preserving order) awk '!seen[$0]++' file.txt # Print unique values from column awk '{print $2}' file.txt | awk '!seen[$0]++' # Join lines with comma awk '{printf "%s%s", sep, $0; sep=","} END{print ""}' file.txt Text Manipulation 1 2 3 4 5 6 7 8 9 10 11 12 13 14 # Remove blank lines awk 'NF' file.txt # Remove leading/trailing whitespace awk '{$1=$1}1' file.txt # Replace field value awk '{$2 = "REDACTED"; print}' file.txt # Add line numbers awk '{print NR": "$0}' file.txt # Print every Nth line awk 'NR % 5 == 0' file.txt Combining with Other Tools 1 2 3 4 5 6 7 8 # Filter then process grep "ERROR" logfile.txt | awk '{print $5}' # Process then sort awk -F: '{print $3, $1}' /etc/passwd | sort -n # Use in pipeline cat data.txt | awk '{print $2}' | sort | uniq -c Multi-line Scripts For complex logic, use a script file:
...