mirror of
https://github.com/openclaw/openclaw.git
synced 2026-02-09 05:19:32 +08:00
313 lines
10 KiB
Python
313 lines
10 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Lists the longest and shortest code files in the project.
|
|
Threshold can be set to warn about files longer or shorter than a certain number of lines.
|
|
"""
|
|
|
|
import os
|
|
import re
|
|
import argparse
|
|
from pathlib import Path
|
|
from typing import List, Tuple, Dict, Set
|
|
from collections import defaultdict
|
|
|
|
# File extensions to consider as code files
|
|
CODE_EXTENSIONS = {
|
|
'.ts', '.tsx', '.js', '.jsx', '.mjs', '.cjs', # TypeScript/JavaScript
|
|
'.swift', # macOS/iOS
|
|
'.kt', '.java', # Android
|
|
'.py', '.sh', # Scripts
|
|
}
|
|
|
|
# Directories to skip
|
|
SKIP_DIRS = {
|
|
'node_modules', '.git', 'dist', 'build', 'coverage',
|
|
'__pycache__', '.turbo', 'out', '.worktrees', 'vendor',
|
|
'Pods', 'DerivedData', '.gradle', '.idea'
|
|
}
|
|
|
|
# Filename patterns to skip in short-file warnings (barrel exports, stubs)
|
|
SKIP_SHORT_PATTERNS = {
|
|
'index.js', 'index.ts', 'postinstall.js',
|
|
}
|
|
SKIP_SHORT_SUFFIXES = ('-cli.ts',)
|
|
|
|
# Function names to skip in duplicate detection (common utilities, test helpers)
|
|
SKIP_DUPLICATE_FUNCTIONS = {
|
|
# Common utility names
|
|
'main', 'init', 'setup', 'teardown', 'cleanup', 'dispose', 'destroy',
|
|
'open', 'close', 'connect', 'disconnect', 'execute', 'run', 'start', 'stop',
|
|
'render', 'update', 'refresh', 'reset', 'clear', 'flush',
|
|
}
|
|
|
|
SKIP_DUPLICATE_PREFIXES = (
|
|
# Transformers
|
|
'normalize', 'parse', 'validate', 'serialize', 'deserialize',
|
|
'convert', 'transform', 'extract', 'encode', 'decode',
|
|
# Predicates
|
|
'is', 'has', 'can', 'should', 'will',
|
|
# Constructors/factories
|
|
'create', 'make', 'build', 'generate', 'new',
|
|
# Accessors
|
|
'get', 'set', 'read', 'write', 'load', 'save', 'fetch',
|
|
# Handlers
|
|
'handle', 'on', 'emit',
|
|
# Modifiers
|
|
'add', 'remove', 'delete', 'update', 'insert', 'append',
|
|
# Other common
|
|
'to', 'from', 'with', 'apply', 'process', 'resolve', 'ensure', 'check',
|
|
'filter', 'map', 'reduce', 'merge', 'split', 'join', 'find', 'search',
|
|
'register', 'unregister', 'subscribe', 'unsubscribe',
|
|
)
|
|
SKIP_DUPLICATE_FILE_PATTERNS = ('.test.ts', '.test.tsx', '.spec.ts')
|
|
|
|
# Known packages in the monorepo
|
|
PACKAGES = {
|
|
'src', 'apps', 'extensions', 'packages', 'scripts', 'ui', 'test', 'docs'
|
|
}
|
|
|
|
|
|
def get_package(file_path: Path, root_dir: Path) -> str:
|
|
"""Get the package name for a file, or 'root' if at top level."""
|
|
try:
|
|
relative = file_path.relative_to(root_dir)
|
|
parts = relative.parts
|
|
if len(parts) > 0 and parts[0] in PACKAGES:
|
|
return parts[0]
|
|
return 'root'
|
|
except ValueError:
|
|
return 'root'
|
|
|
|
|
|
def count_lines(file_path: Path) -> int:
|
|
"""Count the number of lines in a file."""
|
|
try:
|
|
with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
|
|
return sum(1 for _ in f)
|
|
except Exception:
|
|
return 0
|
|
|
|
|
|
def find_code_files(root_dir: Path) -> List[Tuple[Path, int]]:
|
|
"""Find all code files and their line counts."""
|
|
files_with_counts = []
|
|
|
|
for dirpath, dirnames, filenames in os.walk(root_dir):
|
|
# Remove skip directories from dirnames to prevent walking into them
|
|
dirnames[:] = [d for d in dirnames if d not in SKIP_DIRS]
|
|
|
|
for filename in filenames:
|
|
file_path = Path(dirpath) / filename
|
|
if file_path.suffix.lower() in CODE_EXTENSIONS:
|
|
line_count = count_lines(file_path)
|
|
files_with_counts.append((file_path, line_count))
|
|
|
|
return files_with_counts
|
|
|
|
|
|
# Regex patterns for TypeScript functions (exported and internal)
|
|
TS_FUNCTION_PATTERNS = [
|
|
# export function name(...) or function name(...)
|
|
re.compile(r'^(?:export\s+)?(?:async\s+)?function\s+(\w+)', re.MULTILINE),
|
|
# export const name = or const name =
|
|
re.compile(r'^(?:export\s+)?const\s+(\w+)\s*=\s*(?:\([^)]*\)|\w+)\s*=>', re.MULTILINE),
|
|
]
|
|
|
|
|
|
def extract_functions(file_path: Path) -> Set[str]:
|
|
"""Extract function names from a TypeScript file."""
|
|
if file_path.suffix.lower() not in {'.ts', '.tsx'}:
|
|
return set()
|
|
|
|
try:
|
|
with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
|
|
content = f.read()
|
|
except Exception:
|
|
return set()
|
|
|
|
functions = set()
|
|
for pattern in TS_FUNCTION_PATTERNS:
|
|
for match in pattern.finditer(content):
|
|
functions.add(match.group(1))
|
|
|
|
return functions
|
|
|
|
|
|
def find_duplicate_functions(files: List[Tuple[Path, int]], root_dir: Path) -> Dict[str, List[Path]]:
|
|
"""Find function names that appear in multiple files."""
|
|
function_locations: Dict[str, List[Path]] = defaultdict(list)
|
|
|
|
for file_path, _ in files:
|
|
# Skip test files for duplicate detection
|
|
if any(file_path.name.endswith(pat) for pat in SKIP_DUPLICATE_FILE_PATTERNS):
|
|
continue
|
|
|
|
functions = extract_functions(file_path)
|
|
for func in functions:
|
|
# Skip known common function names
|
|
if func in SKIP_DUPLICATE_FUNCTIONS:
|
|
continue
|
|
if any(func.startswith(prefix) for prefix in SKIP_DUPLICATE_PREFIXES):
|
|
continue
|
|
function_locations[func].append(file_path)
|
|
|
|
# Filter to only duplicates
|
|
return {name: paths for name, paths in function_locations.items() if len(paths) > 1}
|
|
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser(
|
|
description='List the longest and shortest code files in a project'
|
|
)
|
|
parser.add_argument(
|
|
'-t', '--threshold',
|
|
type=int,
|
|
default=1000,
|
|
help='Warn about files longer than this many lines (default: 1000)'
|
|
)
|
|
parser.add_argument(
|
|
'--min-threshold',
|
|
type=int,
|
|
default=10,
|
|
help='Warn about files shorter than this many lines (default: 10)'
|
|
)
|
|
parser.add_argument(
|
|
'-n', '--top',
|
|
type=int,
|
|
default=20,
|
|
help='Show top N longest files (default: 20)'
|
|
)
|
|
parser.add_argument(
|
|
'-b', '--bottom',
|
|
type=int,
|
|
default=10,
|
|
help='Show bottom N shortest files (default: 10)'
|
|
)
|
|
parser.add_argument(
|
|
'-d', '--directory',
|
|
type=str,
|
|
default='.',
|
|
help='Directory to scan (default: current directory)'
|
|
)
|
|
|
|
args = parser.parse_args()
|
|
|
|
root_dir = Path(args.directory).resolve()
|
|
print(f"\n📂 Scanning: {root_dir}\n")
|
|
|
|
# Find and sort files by line count
|
|
files = find_code_files(root_dir)
|
|
files_desc = sorted(files, key=lambda x: x[1], reverse=True)
|
|
files_asc = sorted(files, key=lambda x: x[1])
|
|
|
|
# Show top N longest files
|
|
top_files = files_desc[:args.top]
|
|
|
|
print(f"📊 Top {min(args.top, len(top_files))} longest code files:\n")
|
|
print(f"{'Lines':>8} {'File'}")
|
|
print("-" * 60)
|
|
|
|
long_warnings = []
|
|
|
|
for file_path, line_count in top_files:
|
|
relative_path = file_path.relative_to(root_dir)
|
|
|
|
# Check if over threshold
|
|
if line_count >= args.threshold:
|
|
marker = " ⚠️"
|
|
long_warnings.append((relative_path, line_count))
|
|
else:
|
|
marker = ""
|
|
|
|
print(f"{line_count:>8} {relative_path}{marker}")
|
|
|
|
# Show bottom N shortest files
|
|
bottom_files = files_asc[:args.bottom]
|
|
|
|
print(f"\n📉 Bottom {min(args.bottom, len(bottom_files))} shortest code files:\n")
|
|
print(f"{'Lines':>8} {'File'}")
|
|
print("-" * 60)
|
|
|
|
short_warnings = []
|
|
|
|
for file_path, line_count in bottom_files:
|
|
relative_path = file_path.relative_to(root_dir)
|
|
filename = file_path.name
|
|
|
|
# Skip known barrel exports and stubs
|
|
is_expected_short = (
|
|
filename in SKIP_SHORT_PATTERNS or
|
|
any(filename.endswith(suffix) for suffix in SKIP_SHORT_SUFFIXES)
|
|
)
|
|
|
|
# Check if under threshold
|
|
if line_count <= args.min_threshold and not is_expected_short:
|
|
marker = " ⚠️"
|
|
short_warnings.append((relative_path, line_count))
|
|
else:
|
|
marker = ""
|
|
|
|
print(f"{line_count:>8} {relative_path}{marker}")
|
|
|
|
# Summary
|
|
total_files = len(files)
|
|
total_lines = sum(count for _, count in files)
|
|
|
|
print("-" * 60)
|
|
print(f"\n📈 Summary:")
|
|
print(f" Total code files: {total_files:,}")
|
|
print(f" Total lines: {total_lines:,}")
|
|
print(f" Average lines/file: {total_lines // total_files if total_files else 0:,}")
|
|
|
|
# Per-package breakdown
|
|
package_stats: dict[str, dict] = {}
|
|
for file_path, line_count in files:
|
|
pkg = get_package(file_path, root_dir)
|
|
if pkg not in package_stats:
|
|
package_stats[pkg] = {'files': 0, 'lines': 0}
|
|
package_stats[pkg]['files'] += 1
|
|
package_stats[pkg]['lines'] += line_count
|
|
|
|
print(f"\n📦 Per-package breakdown:\n")
|
|
print(f"{'Package':<15} {'Files':>8} {'Lines':>10} {'Avg':>8}")
|
|
print("-" * 45)
|
|
|
|
for pkg in sorted(package_stats.keys(), key=lambda p: package_stats[p]['lines'], reverse=True):
|
|
stats = package_stats[pkg]
|
|
avg = stats['lines'] // stats['files'] if stats['files'] else 0
|
|
print(f"{pkg:<15} {stats['files']:>8,} {stats['lines']:>10,} {avg:>8,}")
|
|
|
|
# Long file warnings
|
|
if long_warnings:
|
|
print(f"\n⚠️ Warning: {len(long_warnings)} file(s) exceed {args.threshold} lines (consider refactoring):")
|
|
for path, count in long_warnings:
|
|
print(f" - {path} ({count:,} lines)")
|
|
else:
|
|
print(f"\n✅ No files exceed {args.threshold} lines")
|
|
|
|
# Short file warnings
|
|
if short_warnings:
|
|
print(f"\n⚠️ Warning: {len(short_warnings)} file(s) are {args.min_threshold} lines or less (check if needed):")
|
|
for path, count in short_warnings:
|
|
print(f" - {path} ({count} lines)")
|
|
else:
|
|
print(f"\n✅ No files are {args.min_threshold} lines or less")
|
|
|
|
# Duplicate function names
|
|
duplicates = find_duplicate_functions(files, root_dir)
|
|
if duplicates:
|
|
print(f"\n⚠️ Warning: {len(duplicates)} function name(s) appear in multiple files (consider renaming):")
|
|
for func_name in sorted(duplicates.keys()):
|
|
paths = duplicates[func_name]
|
|
print(f" - {func_name}:")
|
|
for path in paths:
|
|
print(f" {path.relative_to(root_dir)}")
|
|
else:
|
|
print(f"\n✅ No duplicate function names")
|
|
|
|
print()
|
|
|
|
|
|
if __name__ == '__main__':
|
|
main()
|