grep, awk, and sed are the foundational text processing tools in Unix. They’re old, they’re cryptic, and they’re incredibly powerful once you learn them.
grep: Search and Filter#
grep searches for patterns in text.
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
| # Basic search
grep "error" logfile.txt
# Case insensitive
grep -i "error" logfile.txt
# Show line numbers
grep -n "error" logfile.txt
# Count matches
grep -c "error" logfile.txt
# Invert (lines NOT matching)
grep -v "debug" logfile.txt
# Recursive search
grep -r "TODO" ./src/
# Only filenames
grep -l "password" *.conf
|
Regex with grep#
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
| # Extended regex (-E or egrep)
grep -E "error|warning|critical" logfile.txt
# Word boundary
grep -w "fail" logfile.txt # Matches "fail" not "failure"
# Line start/end
grep "^Error" logfile.txt # Lines starting with Error
grep "done$" logfile.txt # Lines ending with done
# Any character
grep "user.name" logfile.txt # user1name, username, user_name
# Character classes
grep "[0-9]" logfile.txt # Lines with digits
grep "[A-Za-z]" logfile.txt # Lines with letters
|
Context#
1
2
3
4
5
6
7
8
| # Lines before match
grep -B 3 "error" logfile.txt
# Lines after match
grep -A 3 "error" logfile.txt
# Lines before and after
grep -C 3 "error" logfile.txt
|
Real Examples#
1
2
3
4
5
6
7
8
9
10
11
| # Find IP addresses
grep -E "\b[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\b" access.log
# Find function definitions
grep -n "^def \|^function " *.py *.js
# Exclude directories
grep -r "config" . --exclude-dir={node_modules,.git}
# Find files NOT containing pattern
grep -L "copyright" *.py
|
sed: Stream Editor#
sed transforms text line by line.
Substitution#
1
2
3
4
5
6
7
8
9
10
11
12
13
14
| # Replace first occurrence per line
sed 's/old/new/' file.txt
# Replace all occurrences
sed 's/old/new/g' file.txt
# Case insensitive
sed 's/old/new/gi' file.txt
# In-place edit (modifies file)
sed -i 's/old/new/g' file.txt
# Backup before in-place edit
sed -i.bak 's/old/new/g' file.txt
|
Delimiters#
1
2
3
| # Use different delimiter for paths
sed 's|/usr/local|/opt|g' file.txt
sed 's#http://#https://#g' file.txt
|
Line Operations#
1
2
3
4
5
6
7
8
9
10
11
12
13
14
| # Delete lines matching pattern
sed '/pattern/d' file.txt
# Delete line 5
sed '5d' file.txt
# Delete lines 5-10
sed '5,10d' file.txt
# Delete blank lines
sed '/^$/d' file.txt
# Print only matching lines (like grep)
sed -n '/pattern/p' file.txt
|
Insert and Append#
1
2
3
4
5
6
7
8
| # Insert before line 3
sed '3i\New line here' file.txt
# Append after line 3
sed '3a\New line here' file.txt
# Insert before pattern match
sed '/pattern/i\Inserted line' file.txt
|
Multiple Operations#
1
2
3
4
5
| # Multiple substitutions
sed -e 's/foo/bar/g' -e 's/baz/qux/g' file.txt
# Or with semicolons
sed 's/foo/bar/g; s/baz/qux/g' file.txt
|
Real Examples#
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
| # Remove HTML tags
sed 's/<[^>]*>//g' page.html
# Extract between patterns
sed -n '/START/,/END/p' file.txt
# Add line numbers
sed = file.txt | sed 'N;s/\n/\t/'
# Convert DOS to Unix line endings
sed 's/\r$//' file.txt
# Remove leading whitespace
sed 's/^[ \t]*//' file.txt
# Remove trailing whitespace
sed 's/[ \t]*$//' file.txt
|
awk: Pattern Scanning and Processing#
awk is a full programming language for text processing.
Basics#
1
2
3
4
5
6
7
8
9
10
11
| # Print specific columns (fields)
awk '{print $1}' file.txt # First column
awk '{print $1, $3}' file.txt # First and third
awk '{print $NF}' file.txt # Last column
# Custom delimiter
awk -F: '{print $1}' /etc/passwd # Colon-separated
awk -F, '{print $2}' data.csv # CSV
# Print with formatting
awk '{printf "%-10s %s\n", $1, $2}' file.txt
|
Patterns#
1
2
3
4
5
6
7
8
9
| # Filter by pattern
awk '/error/ {print}' logfile.txt
# Filter by field value
awk '$3 > 100 {print}' data.txt
awk '$1 == "admin" {print}' users.txt
# Multiple conditions
awk '$3 > 100 && $4 == "active" {print}' data.txt
|
Built-in Variables#
| Variable | Meaning |
|---|
$0 | Entire line |
$1, $2... | Fields |
NF | Number of fields |
NR | Line number |
FS | Field separator |
OFS | Output field separator |
1
2
3
4
5
6
7
8
| # Line numbers
awk '{print NR, $0}' file.txt
# Last field
awk '{print $NF}' file.txt
# Second-to-last
awk '{print $(NF-1)}' file.txt
|
BEGIN and END#
1
2
3
4
5
6
7
8
9
10
11
| # Header and footer
awk 'BEGIN {print "Name\tScore"} {print $1, $2} END {print "Done"}' file.txt
# Sum a column
awk '{sum += $2} END {print "Total:", sum}' numbers.txt
# Average
awk '{sum += $2; count++} END {print "Avg:", sum/count}' numbers.txt
# Count occurrences
awk '{count[$1]++} END {for (k in count) print k, count[k]}' file.txt
|
String Functions#
1
2
3
4
5
6
7
8
9
10
11
12
13
14
| # Length
awk '{print length($1)}' file.txt
# Substring
awk '{print substr($1, 1, 3)}' file.txt
# Split
awk '{split($0, a, ":"); print a[1]}' file.txt
# Regex match
awk '$1 ~ /^[0-9]+$/ {print}' file.txt
# Substitute
awk '{gsub(/old/, "new"); print}' file.txt
|
Real Examples#
1
2
3
4
5
6
7
8
9
10
11
12
13
14
| # Sum file sizes
ls -l | awk '{sum += $5} END {print sum/1024/1024 " MB"}'
# Parse Apache logs
awk '{print $1}' access.log | sort | uniq -c | sort -rn | head
# CSV processing
awk -F, 'NR > 1 {sum += $3} END {print "Total:", sum}' sales.csv
# Find longest line
awk '{if (length > max) {max = length; longest = $0}} END {print longest}' file.txt
# Transpose columns
awk '{for (i=1; i<=NF; i++) a[i,NR]=$i} END {for (i=1; i<=NF; i++) {for (j=1; j<=NR; j++) printf a[i,j] " "; print ""}}' file.txt
|
The real power comes from pipelines:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
| # Find error counts by type
grep "ERROR" app.log | awk '{print $4}' | sort | uniq -c | sort -rn
# Extract and transform
cat data.csv | grep -v "^#" | awk -F, '{print $1, $3}' | sed 's/ */ /g'
# Find top memory users
ps aux | awk '{print $4, $11}' | sort -rn | head -10
# Parse JSON-ish logs
grep "request_id" app.log | sed 's/.*request_id":"\([^"]*\)".*/\1/' | sort -u
# Count HTTP status codes
awk '{print $9}' access.log | sort | uniq -c | sort -rn
|
Quick Reference#
grep#
1
2
3
4
5
6
7
| grep -i # Case insensitive
grep -v # Invert match
grep -n # Line numbers
grep -c # Count
grep -r # Recursive
grep -E # Extended regex
grep -A/B/C N # Context lines
|
sed#
1
2
3
4
5
| sed 's/a/b/' # Replace first
sed 's/a/b/g' # Replace all
sed -i # In-place
sed '/p/d' # Delete matching
sed -n '/p/p' # Print matching
|
awk#
1
2
3
4
5
| awk '{print $1}' # First field
awk -F: # Set delimiter
awk '/pat/ {action}' # Pattern match
awk 'NR > 1' # Skip header
awk '{sum+=$1} END {print sum}' # Sum
|
These three tools handle 90% of text processing tasks. Learn them well and you’ll solve problems in seconds that would take minutes in a “real” programming language.
📬 Get the Newsletter
Weekly insights on DevOps, automation, and CLI mastery. No spam, unsubscribe anytime.