Python Patterns for Command-Line Scripts

Python is the go-to language for automation scripts. Here’s how to write CLI tools that are reliable and user-friendly. Basic Script Structure 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 #!/usr/bin/env python3 """One-line description of what this script does.""" import argparse import sys def main(): parser = argparse.ArgumentParser(description=__doc__) parser.add_argument('input', help='Input file path') parser.add_argument('-o', '--output', help='Output file path') parser.add_argument('-v', '--verbose', action='store_true') args = parser.parse_args() # Your logic here process(args.input, args.output, args.verbose) if __name__ == '__main__': main() Argument Parsing with argparse Positional Arguments 1 2 3 4 parser.add_argument('filename') # Required parser.add_argument('files', nargs='+') # One or more parser.add_argument('files', nargs='*') # Zero or more parser.add_argument('config', nargs='?') # Optional positional Optional Arguments 1 2 3 4 5 parser.add_argument('-v', '--verbose', action='store_true') parser.add_argument('-q', '--quiet', action='store_false', dest='verbose') parser.add_argument('-n', '--count', type=int, default=10) parser.add_argument('-f', '--format', choices=['json', 'csv', 'table']) parser.add_argument('--config', type=argparse.FileType('r')) Subcommands 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 parser = argparse.ArgumentParser() subparsers = parser.add_subparsers(dest='command', required=True) # 'init' command init_parser = subparsers.add_parser('init', help='Initialize project') init_parser.add_argument('--force', action='store_true') # 'run' command run_parser = subparsers.add_parser('run', help='Run the application') run_parser.add_argument('--port', type=int, default=8080) args = parser.parse_args() if args.command == 'init': do_init(args.force) elif args.command == 'run': do_run(args.port) Error Handling 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 import sys def main(): try: result = process() return 0 except FileNotFoundError as e: print(f"Error: File not found: {e.filename}", file=sys.stderr) return 1 except PermissionError: print("Error: Permission denied", file=sys.stderr) return 1 except KeyboardInterrupt: print("\nInterrupted", file=sys.stderr) return 130 except Exception as e: print(f"Error: {e}", file=sys.stderr) return 1 if __name__ == '__main__': sys.exit(main()) Logging 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 import logging def setup_logging(verbose=False): level = logging.DEBUG if verbose else logging.INFO logging.basicConfig( level=level, format='%(asctime)s - %(levelname)s - %(message)s', datefmt='%Y-%m-%d %H:%M:%S' ) def main(): args = parse_args() setup_logging(args.verbose) logging.info("Starting process") logging.debug("Detailed info here") logging.warning("Something might be wrong") logging.error("Something went wrong") Log to File and Console 1 2 3 4 5 6 7 8 9 10 11 def setup_logging(verbose=False, log_file=None): handlers = [logging.StreamHandler()] if log_file: handlers.append(logging.FileHandler(log_file)) logging.basicConfig( level=logging.DEBUG if verbose else logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', handlers=handlers ) Progress Indicators Simple Progress 1 2 3 4 5 6 7 8 import sys def process_items(items): total = len(items) for i, item in enumerate(items, 1): process(item) print(f"\rProcessing: {i}/{total}", end='', flush=True) print() # Newline at end With tqdm 1 2 3 4 5 6 7 8 9 10 from tqdm import tqdm for item in tqdm(items, desc="Processing"): process(item) # Or wrap any iterable with tqdm(total=100) as pbar: for i in range(100): do_work() pbar.update(1) Reading Input From File or Stdin 1 2 3 4 5 6 7 8 9 10 import sys def read_input(filepath=None): if filepath: with open(filepath) as f: return f.read() elif not sys.stdin.isatty(): return sys.stdin.read() else: raise ValueError("No input provided") Line by Line 1 2 3 4 5 import fileinput # Reads from files in args or stdin for line in fileinput.input(): process(line.strip()) Output Formatting JSON Output 1 2 3 4 5 6 7 import json def output_json(data, pretty=False): if pretty: print(json.dumps(data, indent=2, default=str)) else: print(json.dumps(data, default=str)) Table Output 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 def print_table(headers, rows): # Calculate column widths widths = [len(h) for h in headers] for row in rows: for i, cell in enumerate(row): widths[i] = max(widths[i], len(str(cell))) # Print header header_line = ' | '.join(h.ljust(widths[i]) for i, h in enumerate(headers)) print(header_line) print('-' * len(header_line)) # Print rows for row in rows: print(' | '.join(str(cell).ljust(widths[i]) for i, cell in enumerate(row))) With tabulate 1 2 3 4 5 6 7 from tabulate import tabulate data = [ ['Alice', 30, 'Engineer'], ['Bob', 25, 'Designer'], ] print(tabulate(data, headers=['Name', 'Age', 'Role'], tablefmt='grid')) Configuration Files YAML Config 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 import yaml from pathlib import Path def load_config(config_path=None): paths = [ config_path, Path.home() / '.myapp.yaml', Path('/etc/myapp/config.yaml'), ] for path in paths: if path and Path(path).exists(): with open(path) as f: return yaml.safe_load(f) return {} # Defaults Environment Variables 1 2 3 4 5 6 7 8 import os def get_config(): return { 'api_key': os.environ.get('API_KEY'), 'debug': os.environ.get('DEBUG', '').lower() in ('true', '1', 'yes'), 'timeout': int(os.environ.get('TIMEOUT', '30')), } Running External Commands 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 import subprocess def run_command(cmd, check=True): """Run command and return output.""" result = subprocess.run( cmd, shell=isinstance(cmd, str), capture_output=True, text=True, check=check ) return result.stdout.strip() # Usage output = run_command(['git', 'status', '--short']) output = run_command('ls -la | head -5') With Timeout 1 2 3 4 5 6 7 8 9 try: result = subprocess.run( ['slow-command'], timeout=30, capture_output=True, text=True ) except subprocess.TimeoutExpired: print("Command timed out") Temporary Files 1 2 3 4 5 6 7 8 9 10 11 12 13 import tempfile from pathlib import Path # Temporary file with tempfile.NamedTemporaryFile(mode='w', suffix='.json', delete=False) as f: f.write('{"data": "value"}') temp_path = f.name # Temporary directory with tempfile.TemporaryDirectory() as tmpdir: work_file = Path(tmpdir) / 'work.txt' work_file.write_text('working...') # Directory deleted when context exits Path Handling 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 from pathlib import Path def process_files(directory): base = Path(directory) # Find files for path in base.glob('**/*.py'): print(f"Processing: {path}") # Path operations print(f" Name: {path.name}") print(f" Stem: {path.stem}") print(f" Suffix: {path.suffix}") print(f" Parent: {path.parent}") # Read/write content = path.read_text() path.with_suffix('.bak').write_text(content) Complete Example 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 #!/usr/bin/env python3 """Process log files and output statistics.""" import argparse import json import logging import sys from collections import Counter from pathlib import Path def setup_logging(verbose): logging.basicConfig( level=logging.DEBUG if verbose else logging.INFO, format='%(levelname)s: %(message)s' ) def parse_args(): parser = argparse.ArgumentParser( description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter ) parser.add_argument( 'logfiles', nargs='+', type=Path, help='Log files to process' ) parser.add_argument( '-o', '--output', type=argparse.FileType('w'), default=sys.stdout, help='Output file (default: stdout)' ) parser.add_argument( '-f', '--format', choices=['json', 'text'], default='text', help='Output format' ) parser.add_argument( '-v', '--verbose', action='store_true', help='Enable verbose output' ) return parser.parse_args() def analyze_logs(logfiles): stats = Counter() for logfile in logfiles: logging.info(f"Processing {logfile}") if not logfile.exists(): logging.warning(f"File not found: {logfile}") continue for line in logfile.read_text().splitlines(): if 'ERROR' in line: stats['errors'] += 1 elif 'WARNING' in line: stats['warnings'] += 1 stats['total'] += 1 return dict(stats) def output_results(stats, output, fmt): if fmt == 'json': json.dump(stats, output, indent=2) output.write('\n') else: for key, value in stats.items(): output.write(f"{key}: {value}\n") def main(): args = parse_args() setup_logging(args.verbose) try: stats = analyze_logs(args.logfiles) output_results(stats, args.output, args.format) return 0 except Exception as e: logging.error(f"Failed: {e}") return 1 if __name__ == '__main__': sys.exit(main()) Usage: ...

February 28, 2026 Â· 6 min Â· 1202 words Â· Rob Washington

awk Patterns for Log Analysis and Text Processing

awk sits between grep and a full programming language. It’s perfect for columnar data, log files, and quick text transformations. The Basic Pattern 1 awk 'pattern { action }' file If pattern matches, run action. No pattern means every line. No action means print. 1 2 3 4 5 6 7 8 9 10 11 # Print everything awk '{ print }' file.txt # Print lines matching pattern awk '/error/' file.txt # Print second column awk '{ print $2 }' file.txt # Combined: errors, show timestamp and message awk '/error/ { print $1, $4 }' app.log Field Handling awk splits lines into fields by whitespace (default): ...

February 28, 2026 Â· 7 min Â· 1401 words Â· Rob Washington

jq Patterns for JSON Processing on the Command Line

JSON is everywhere. APIs return it, configs use it, logs contain it. jq is the Swiss Army knife for processing it all from the command line. Basic Selection 1 2 3 4 5 6 7 8 9 10 11 12 13 # Pretty print echo '{"name":"alice","age":30}' | jq . # Extract a field echo '{"name":"alice","age":30}' | jq '.name' # Output: "alice" # Raw output (no quotes) echo '{"name":"alice","age":30}' | jq -r '.name' # Output: alice # Nested fields echo '{"user":{"name":"alice"}}' | jq '.user.name' Working with Arrays 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 # Get all elements echo '[1,2,3]' | jq '.[]' # Output: # 1 # 2 # 3 # Get specific index echo '["a","b","c"]' | jq '.[1]' # Output: "b" # Slice echo '[1,2,3,4,5]' | jq '.[2:4]' # Output: [3,4] # First/last echo '[1,2,3]' | jq 'first' # 1 echo '[1,2,3]' | jq 'last' # 3 Filtering 1 2 3 4 5 6 7 8 9 10 11 12 13 # Select objects matching condition echo '[{"name":"alice","age":30},{"name":"bob","age":25}]' | \ jq '.[] | select(.age > 27)' # Output: {"name":"alice","age":30} # Multiple conditions jq '.[] | select(.status == "active" and .role == "admin")' # Contains jq '.[] | select(.tags | contains(["important"]))' # Regex matching jq '.[] | select(.email | test("@company\\.com$"))' Transforming Data 1 2 3 4 5 6 7 8 9 10 11 12 # Create new object echo '{"first":"Alice","last":"Smith"}' | \ jq '{fullName: (.first + " " + .last)}' # Output: {"fullName":"Alice Smith"} # Map over array echo '[1,2,3]' | jq 'map(. * 2)' # Output: [2,4,6] # Transform array of objects echo '[{"name":"alice"},{"name":"bob"}]' | \ jq 'map({user: .name, active: true})' API Response Processing 1 2 3 4 5 6 7 8 9 10 11 12 # Extract data from GitHub API curl -s https://api.github.com/users/torvalds/repos | \ jq '.[] | {name, stars: .stargazers_count, language}' | \ jq -s 'sort_by(.stars) | reverse | .[0:5]' # Get just names curl -s https://api.github.com/users/torvalds/repos | \ jq -r '.[].name' # Count items curl -s https://api.github.com/users/torvalds/repos | \ jq 'length' Aggregation 1 2 3 4 5 6 7 8 9 10 11 12 13 14 # Sum echo '[{"value":10},{"value":20},{"value":30}]' | \ jq '[.[].value] | add' # Output: 60 # Average jq '[.[].value] | add / length' # Group by echo '[{"type":"a","n":1},{"type":"b","n":2},{"type":"a","n":3}]' | \ jq 'group_by(.type) | map({type: .[0].type, total: [.[].n] | add})' # Count by field jq 'group_by(.status) | map({status: .[0].status, count: length})' Building Output 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 # Concatenate to string echo '{"host":"db","port":5432}' | \ jq -r '"\(.host):\(.port)"' # Output: db:5432 # Create CSV echo '[{"name":"alice","age":30},{"name":"bob","age":25}]' | \ jq -r '.[] | [.name, .age] | @csv' # Output: # "alice",30 # "bob",25 # Create TSV jq -r '.[] | [.name, .age] | @tsv' # URI encode jq -r '@uri' # Base64 jq -r '@base64' Conditional Logic 1 2 3 4 5 6 7 8 9 10 11 # If/then/else echo '{"score":85}' | \ jq 'if .score >= 90 then "A" elif .score >= 80 then "B" else "C" end' # Alternative operator (default values) echo '{"name":"alice"}' | jq '.age // 0' # Output: 0 # Try (suppress errors) echo '{"a":1}' | jq '.b.c.d // "missing"' # Output: "missing" Modifying JSON 1 2 3 4 5 6 7 8 9 10 11 12 13 14 # Update field echo '{"name":"alice","age":30}' | jq '.age = 31' # Add field echo '{"name":"alice"}' | jq '. + {active: true}' # Delete field echo '{"name":"alice","temp":123}' | jq 'del(.temp)' # Update nested echo '{"user":{"name":"alice"}}' | jq '.user.name = "bob"' # Recursive update jq '.. | objects | .timestamp |= (. // now)' Multiple Files 1 2 3 4 5 6 7 8 9 # Combine objects from files jq -s '.[0] * .[1]' defaults.json overrides.json # Process files independently jq -r '.name' file1.json file2.json # Slurp into array jq -s '.' file1.json file2.json # Output: [{...}, {...}] Stream Processing For large files, use streaming: ...

February 28, 2026 Â· 5 min Â· 1029 words Â· Rob Washington

htop: Process Monitoring for Humans

top works. htop works better. It’s colorful, interactive, and actually pleasant to use. Here’s how to get the most from it. Installation 1 2 3 4 5 6 7 8 # Debian/Ubuntu sudo apt install htop # RHEL/CentOS/Fedora sudo dnf install htop # macOS brew install htop The Interface Launch with htop. You’ll see: Top section: CPU bars (one per core) Memory and swap usage Tasks, load average, uptime Process list: ...

February 27, 2026 Â· 4 min Â· 664 words Â· Rob Washington

grep: Pattern Matching That Actually Works

You know grep "error" logfile.txt. But grep can do so much more — recursive searches, context lines, inverse matching, and regex patterns that turn hours of manual searching into seconds. The Basics 1 2 3 4 5 6 7 8 9 10 11 12 13 14 # Search for pattern in file grep "error" app.log # Case-insensitive grep -i "error" app.log # Show line numbers grep -n "error" app.log # Count matches grep -c "error" app.log # Only show filenames with matches grep -l "error" *.log Recursive Search 1 2 3 4 5 6 7 8 9 10 11 12 13 14 # Search all files in directory tree grep -r "TODO" ./src # With line numbers grep -rn "TODO" ./src # Include only certain files grep -r --include="*.py" "import os" . # Exclude directories grep -r --exclude-dir=node_modules "console.log" . # Multiple excludes grep -r --exclude-dir={node_modules,.git,dist} "function" . Context Lines When you find a match, you often need surrounding context: ...

February 27, 2026 Â· 6 min Â· 1087 words Â· Rob Washington

sed: Edit Files Without Opening Them

You need to change a config value across 50 files. You could open each one, or: 1 sed -i 's/old_value/new_value/g' *.conf Done. sed is the stream editor — it transforms text as it flows through. Master it, and you’ll never manually edit repetitive files again. The Basics 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 # Replace first occurrence per line echo "hello hello" | sed 's/hello/hi/' # hi hello # Replace all occurrences (g = global) echo "hello hello" | sed 's/hello/hi/g' # hi hi # Replace in file (print to stdout) sed 's/foo/bar/g' file.txt # Replace in place (-i) sed -i 's/foo/bar/g' file.txt # Backup before in-place edit sed -i.bak 's/foo/bar/g' file.txt The -i flag is powerful and dangerous. Always test without it first. ...

February 27, 2026 Â· 5 min Â· 911 words Â· Rob Washington

awk: When grep and cut Aren't Enough

You can grep for lines and cut for columns. But what about “show me the third column of lines containing ERROR, but only if the second column is greater than 100”? That’s awk territory. The Basics awk processes text line by line, splitting each into fields: 1 2 3 4 5 6 7 8 9 # Print second column (space-delimited by default) echo "hello world" | awk '{print $2}' # world # Print first and third columns cat data.txt | awk '{print $1, $3}' # Print entire line awk '{print $0}' file.txt $1, $2, etc. are fields. $0 is the whole line. NF is the number of fields. NR is the line number. ...

February 27, 2026 Â· 6 min Â· 1125 words Â· Rob Washington

find: The Swiss Army Knife You're Underusing

Every developer knows find . -name "*.txt". Few know that find can replace half your shell scripts. Beyond Basic Search 1 2 3 4 5 6 7 8 9 10 11 # Find by name (case-insensitive) find . -iname "readme*" # Find by extension find . -name "*.py" # Find by exact name find . -name "Makefile" # Find excluding directories find . -name "*.js" -not -path "./node_modules/*" The -not (or !) operator is your friend for excluding noise. ...

February 27, 2026 Â· 6 min Â· 1166 words Â· Rob Washington

xargs: Turn Any Output Into Parallel Commands

You have a list of files. You need to process each one. The naive approach: 1 2 3 for file in $(cat files.txt); do process "$file" done This works until it doesn’t — filenames with spaces break it, and it’s sequential. Enter xargs. The Basics xargs reads input and converts it into arguments for a command: 1 2 3 4 5 # Delete files listed in a file cat files.txt | xargs rm # Same thing, more efficient xargs rm < files.txt Without xargs, you’d need a loop. With xargs, one line. ...

February 27, 2026 Â· 5 min Â· 1033 words Â· Rob Washington

AWS CLI Power User: Queries, Filters, and Automation

The AWS Console is fine for exploration. For real work—auditing, automation, bulk operations—the CLI is essential. Here’s how to use it effectively. Output Formats 1 2 3 4 5 6 7 8 9 10 11 # JSON (default, best for scripting) aws ec2 describe-instances --output json # Table (human readable) aws ec2 describe-instances --output table # Text (tab-separated, grep-friendly) aws ec2 describe-instances --output text # YAML aws ec2 describe-instances --output yaml Set default in ~/.aws/config: ...

February 26, 2026 Â· 6 min Â· 1170 words Â· Rob Washington