grep and find: Search Patterns for the Command Line
Two commands solve 90% of search problems on Unix systems: grep for text patterns and find for file locations. Master these and you’ll navigate any codebase. grep Basics 1 2 3 4 5 6 7 8 9 10 11 12 13 14 # Search for pattern in file grep "error" logfile.txt # Case insensitive grep -i "error" logfile.txt # Show line numbers grep -n "error" logfile.txt # Count matches grep -c "error" logfile.txt # Invert match (lines NOT matching) grep -v "debug" logfile.txt grep in Multiple Files 1 2 3 4 5 6 7 8 9 10 11 # Search all files in directory grep "TODO" *.py # Recursive search grep -r "TODO" src/ # Show only filenames grep -l "TODO" *.py # Show filenames with no match grep -L "TODO" *.py grep with Context 1 2 3 4 5 6 7 8 # 3 lines before match grep -B 3 "error" logfile.txt # 3 lines after match grep -A 3 "error" logfile.txt # 3 lines before and after grep -C 3 "error" logfile.txt grep Regular Expressions 1 2 3 4 5 6 7 8 9 10 11 # Basic regex (default) grep "error.*failed" logfile.txt # Extended regex grep -E "error|warning|critical" logfile.txt # Or use egrep egrep "error|warning" logfile.txt # Perl regex (most powerful) grep -P "\d{4}-\d{2}-\d{2}" logfile.txt Common Patterns 1 2 3 4 5 6 7 8 9 10 11 12 13 14 # IP addresses grep -E "\b[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\b" access.log # Email addresses grep -E "[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}" users.txt # URLs grep -E "https?://[^\s]+" document.txt # Timestamps grep -P "\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}" app.log # Word boundaries grep -w "error" logfile.txt # Won't match "errors" or "error_code" grep with Exclusions 1 2 3 4 5 6 7 8 # Exclude directories grep -r "TODO" --exclude-dir=node_modules --exclude-dir=.git . # Exclude file patterns grep -r "TODO" --exclude="*.min.js" --exclude="*.map" . # Include only certain files grep -r "TODO" --include="*.py" --include="*.js" . find Basics 1 2 3 4 5 6 7 8 9 10 11 12 13 14 # Find by name find . -name "*.py" # Case insensitive name find . -iname "readme*" # Find directories find . -type d -name "test*" # Find files find . -type f -name "*.log" # Find links find . -type l find by Time 1 2 3 4 5 6 7 8 9 10 11 12 13 14 # Modified in last 7 days find . -mtime -7 # Modified more than 30 days ago find . -mtime +30 # Modified in last 60 minutes find . -mmin -60 # Accessed in last day find . -atime -1 # Changed (metadata) in last day find . -ctime -1 find by Size 1 2 3 4 5 6 7 8 9 10 11 # Files larger than 100MB find . -size +100M # Files smaller than 1KB find . -size -1k # Files exactly 0 bytes (empty) find . -size 0 # Size units: c (bytes), k (KB), M (MB), G (GB) find . -size +1G find by Permissions 1 2 3 4 5 6 7 8 9 10 11 12 13 14 # Executable files find . -perm /u+x -type f # World-writable files find . -perm -002 # SUID files find . -perm -4000 # Files owned by user find . -user root # Files owned by group find . -group www-data find with Actions 1 2 3 4 5 6 7 8 9 10 11 12 13 14 # Print (default) find . -name "*.log" -print # Delete (careful!) find . -name "*.tmp" -delete # Execute command for each file find . -name "*.py" -exec wc -l {} \; # Execute command with all files at once find . -name "*.py" -exec wc -l {} + # Interactive delete find . -name "*.bak" -ok rm {} \; find Logical Operators 1 2 3 4 5 6 7 8 9 10 11 # AND (implicit) find . -name "*.py" -size +100k # OR find . -name "*.py" -o -name "*.js" # NOT find . ! -name "*.pyc" # Grouping find . \( -name "*.py" -o -name "*.js" \) -size +10k Combining grep and find 1 2 3 4 5 6 7 8 9 10 11 # Find files and grep in them find . -name "*.py" -exec grep -l "import os" {} \; # More efficient with xargs find . -name "*.py" | xargs grep -l "import os" # Handle spaces in filenames find . -name "*.py" -print0 | xargs -0 grep -l "import os" # Find recently modified files with pattern find . -name "*.log" -mtime -1 -exec grep "ERROR" {} + Practical Examples Find Large Files 1 2 3 4 5 # Top 10 largest files find . -type f -exec du -h {} + | sort -rh | head -10 # Files over 100MB, sorted find . -type f -size +100M -exec ls -lh {} \; | sort -k5 -h Find and Clean 1 2 3 4 5 6 7 8 9 # Remove old log files find /var/log -name "*.log" -mtime +30 -delete # Remove empty directories find . -type d -empty -delete # Remove Python cache find . -type d -name "__pycache__" -exec rm -rf {} + find . -name "*.pyc" -delete Search Code 1 2 3 4 5 6 7 8 # Find function definitions grep -rn "def " --include="*.py" src/ # Find TODO comments grep -rn "TODO\|FIXME\|XXX" --include="*.py" . # Find imports grep -r "^import\|^from" --include="*.py" src/ | sort -u Search Logs 1 2 3 4 5 6 7 8 # Errors in last hour find /var/log -name "*.log" -mmin -60 -exec grep -l "ERROR" {} \; # Count errors per file find /var/log -name "*.log" -exec sh -c 'echo "$1: $(grep -c ERROR "$1")"' _ {} \; # Unique IPs from access log grep -oE "\b[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+\b" access.log | sort -u Find Duplicates 1 2 3 4 5 # Find files with same name find . -type f -name "*.py" | xargs -I{} basename {} | sort | uniq -d # Find by content hash (requires md5sum) find . -type f -exec md5sum {} \; | sort | uniq -w32 -d Performance Tips 1 2 3 4 5 6 7 8 9 10 11 # Stop at first match (faster) grep -m 1 "pattern" largefile.txt # Use fixed strings when possible (faster than regex) grep -F "exact string" file.txt # Limit find depth find . -maxdepth 2 -name "*.py" # Prune directories find . -path ./node_modules -prune -o -name "*.js" -print ripgrep (Modern Alternative) If available, rg is faster than grep: ...