Centralize date/time formatting utilities (#11831)

2026-02-08 21:09:23 +08:00 · 2026-02-08 04:53:31 -08:00
parent 74fbbda283
commit a1123dd9be
77 changed files with 1508 additions and 1075 deletions
--- a/scripts/analyze_code_files.py
+++ b/scripts/analyze_code_files.py
@@ -0,0 +1,312 @@
+#!/usr/bin/env python3
+"""
+Lists the longest and shortest code files in the project.
+Threshold can be set to warn about files longer or shorter than a certain number of lines.
+"""
+
+import os
+import re
+import argparse
+from pathlib import Path
+from typing import List, Tuple, Dict, Set
+from collections import defaultdict
+
+# File extensions to consider as code files
+CODE_EXTENSIONS = {
+    '.ts', '.tsx', '.js', '.jsx', '.mjs', '.cjs',  # TypeScript/JavaScript
+    '.swift',  # macOS/iOS
+    '.kt', '.java',  # Android
+    '.py', '.sh',  # Scripts
+}
+
+# Directories to skip
+SKIP_DIRS = {
+    'node_modules', '.git', 'dist', 'build', 'coverage',
+    '__pycache__', '.turbo', 'out', '.worktrees', 'vendor',
+    'Pods', 'DerivedData', '.gradle', '.idea'
+}
+
+# Filename patterns to skip in short-file warnings (barrel exports, stubs)
+SKIP_SHORT_PATTERNS = {
+    'index.js', 'index.ts', 'postinstall.js',
+}
+SKIP_SHORT_SUFFIXES = ('-cli.ts',)
+
+# Function names to skip in duplicate detection (common utilities, test helpers)
+SKIP_DUPLICATE_FUNCTIONS = {
+    # Common utility names
+    'main', 'init', 'setup', 'teardown', 'cleanup', 'dispose', 'destroy',
+    'open', 'close', 'connect', 'disconnect', 'execute', 'run', 'start', 'stop',
+    'render', 'update', 'refresh', 'reset', 'clear', 'flush',
+}
+
+SKIP_DUPLICATE_PREFIXES = (
+    # Transformers
+    'normalize', 'parse', 'validate', 'serialize', 'deserialize',
+    'convert', 'transform', 'extract', 'encode', 'decode',
+    # Predicates
+    'is', 'has', 'can', 'should', 'will',
+    # Constructors/factories
+    'create', 'make', 'build', 'generate', 'new',
+    # Accessors
+    'get', 'set', 'read', 'write', 'load', 'save', 'fetch',
+    # Handlers
+    'handle', 'on', 'emit',
+    # Modifiers
+    'add', 'remove', 'delete', 'update', 'insert', 'append',
+    # Other common
+    'to', 'from', 'with', 'apply', 'process', 'resolve', 'ensure', 'check',
+    'filter', 'map', 'reduce', 'merge', 'split', 'join', 'find', 'search',
+    'register', 'unregister', 'subscribe', 'unsubscribe',
+)
+SKIP_DUPLICATE_FILE_PATTERNS = ('.test.ts', '.test.tsx', '.spec.ts')
+
+# Known packages in the monorepo
+PACKAGES = {
+    'src', 'apps', 'extensions', 'packages', 'scripts', 'ui', 'test', 'docs'
+}
+
+
+def get_package(file_path: Path, root_dir: Path) -> str:
+    """Get the package name for a file, or 'root' if at top level."""
+    try:
+        relative = file_path.relative_to(root_dir)
+        parts = relative.parts
+        if len(parts) > 0 and parts[0] in PACKAGES:
+            return parts[0]
+        return 'root'
+    except ValueError:
+        return 'root'
+
+
+def count_lines(file_path: Path) -> int:
+    """Count the number of lines in a file."""
+    try:
+        with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
+            return sum(1 for _ in f)
+    except Exception:
+        return 0
+
+
+def find_code_files(root_dir: Path) -> List[Tuple[Path, int]]:
+    """Find all code files and their line counts."""
+    files_with_counts = []
+    
+    for dirpath, dirnames, filenames in os.walk(root_dir):
+        # Remove skip directories from dirnames to prevent walking into them
+        dirnames[:] = [d for d in dirnames if d not in SKIP_DIRS]
+        
+        for filename in filenames:
+            file_path = Path(dirpath) / filename
+            if file_path.suffix.lower() in CODE_EXTENSIONS:
+                line_count = count_lines(file_path)
+                files_with_counts.append((file_path, line_count))
+    
+    return files_with_counts
+
+
+# Regex patterns for TypeScript functions (exported and internal)
+TS_FUNCTION_PATTERNS = [
+    # export function name(...) or function name(...)
+    re.compile(r'^(?:export\s+)?(?:async\s+)?function\s+(\w+)', re.MULTILINE),
+    # export const name = or const name =
+    re.compile(r'^(?:export\s+)?const\s+(\w+)\s*=\s*(?:\([^)]*\)|\w+)\s*=>', re.MULTILINE),
+]
+
+
+def extract_functions(file_path: Path) -> Set[str]:
+    """Extract function names from a TypeScript file."""
+    if file_path.suffix.lower() not in {'.ts', '.tsx'}:
+        return set()
+    
+    try:
+        with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
+            content = f.read()
+    except Exception:
+        return set()
+    
+    functions = set()
+    for pattern in TS_FUNCTION_PATTERNS:
+        for match in pattern.finditer(content):
+            functions.add(match.group(1))
+    
+    return functions
+
+
+def find_duplicate_functions(files: List[Tuple[Path, int]], root_dir: Path) -> Dict[str, List[Path]]:
+    """Find function names that appear in multiple files."""
+    function_locations: Dict[str, List[Path]] = defaultdict(list)
+    
+    for file_path, _ in files:
+        # Skip test files for duplicate detection
+        if any(file_path.name.endswith(pat) for pat in SKIP_DUPLICATE_FILE_PATTERNS):
+            continue
+        
+        functions = extract_functions(file_path)
+        for func in functions:
+            # Skip known common function names
+            if func in SKIP_DUPLICATE_FUNCTIONS:
+                continue
+            if any(func.startswith(prefix) for prefix in SKIP_DUPLICATE_PREFIXES):
+                continue
+            function_locations[func].append(file_path)
+    
+    # Filter to only duplicates
+    return {name: paths for name, paths in function_locations.items() if len(paths) > 1}
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description='List the longest and shortest code files in a project'
+    )
+    parser.add_argument(
+        '-t', '--threshold',
+        type=int,
+        default=1000,
+        help='Warn about files longer than this many lines (default: 1000)'
+    )
+    parser.add_argument(
+        '--min-threshold',
+        type=int,
+        default=10,
+        help='Warn about files shorter than this many lines (default: 10)'
+    )
+    parser.add_argument(
+        '-n', '--top',
+        type=int,
+        default=20,
+        help='Show top N longest files (default: 20)'
+    )
+    parser.add_argument(
+        '-b', '--bottom',
+        type=int,
+        default=10,
+        help='Show bottom N shortest files (default: 10)'
+    )
+    parser.add_argument(
+        '-d', '--directory',
+        type=str,
+        default='.',
+        help='Directory to scan (default: current directory)'
+    )
+    
+    args = parser.parse_args()
+    
+    root_dir = Path(args.directory).resolve()
+    print(f"\n📂 Scanning: {root_dir}\n")
+    
+    # Find and sort files by line count
+    files = find_code_files(root_dir)
+    files_desc = sorted(files, key=lambda x: x[1], reverse=True)
+    files_asc = sorted(files, key=lambda x: x[1])
+    
+    # Show top N longest files
+    top_files = files_desc[:args.top]
+    
+    print(f"📊 Top {min(args.top, len(top_files))} longest code files:\n")
+    print(f"{'Lines':>8}  {'File'}")
+    print("-" * 60)
+    
+    long_warnings = []
+    
+    for file_path, line_count in top_files:
+        relative_path = file_path.relative_to(root_dir)
+        
+        # Check if over threshold
+        if line_count >= args.threshold:
+            marker = " ⚠️"
+            long_warnings.append((relative_path, line_count))
+        else:
+            marker = ""
+        
+        print(f"{line_count:>8}  {relative_path}{marker}")
+    
+    # Show bottom N shortest files
+    bottom_files = files_asc[:args.bottom]
+    
+    print(f"\n📉 Bottom {min(args.bottom, len(bottom_files))} shortest code files:\n")
+    print(f"{'Lines':>8}  {'File'}")
+    print("-" * 60)
+    
+    short_warnings = []
+    
+    for file_path, line_count in bottom_files:
+        relative_path = file_path.relative_to(root_dir)
+        filename = file_path.name
+        
+        # Skip known barrel exports and stubs
+        is_expected_short = (
+            filename in SKIP_SHORT_PATTERNS or
+            any(filename.endswith(suffix) for suffix in SKIP_SHORT_SUFFIXES)
+        )
+        
+        # Check if under threshold
+        if line_count <= args.min_threshold and not is_expected_short:
+            marker = " ⚠️"
+            short_warnings.append((relative_path, line_count))
+        else:
+            marker = ""
+        
+        print(f"{line_count:>8}  {relative_path}{marker}")
+    
+    # Summary
+    total_files = len(files)
+    total_lines = sum(count for _, count in files)
+    
+    print("-" * 60)
+    print(f"\n📈 Summary:")
+    print(f"   Total code files: {total_files:,}")
+    print(f"   Total lines: {total_lines:,}")
+    print(f"   Average lines/file: {total_lines // total_files if total_files else 0:,}")
+    
+    # Per-package breakdown
+    package_stats: dict[str, dict] = {}
+    for file_path, line_count in files:
+        pkg = get_package(file_path, root_dir)
+        if pkg not in package_stats:
+            package_stats[pkg] = {'files': 0, 'lines': 0}
+        package_stats[pkg]['files'] += 1
+        package_stats[pkg]['lines'] += line_count
+    
+    print(f"\n📦 Per-package breakdown:\n")
+    print(f"{'Package':<15} {'Files':>8} {'Lines':>10} {'Avg':>8}")
+    print("-" * 45)
+    
+    for pkg in sorted(package_stats.keys(), key=lambda p: package_stats[p]['lines'], reverse=True):
+        stats = package_stats[pkg]
+        avg = stats['lines'] // stats['files'] if stats['files'] else 0
+        print(f"{pkg:<15} {stats['files']:>8,} {stats['lines']:>10,} {avg:>8,}")
+    
+    # Long file warnings
+    if long_warnings:
+        print(f"\n⚠️  Warning: {len(long_warnings)} file(s) exceed {args.threshold} lines (consider refactoring):")
+        for path, count in long_warnings:
+            print(f"   - {path} ({count:,} lines)")
+    else:
+        print(f"\n✅ No files exceed {args.threshold} lines")
+    
+    # Short file warnings
+    if short_warnings:
+        print(f"\n⚠️  Warning: {len(short_warnings)} file(s) are {args.min_threshold} lines or less (check if needed):")
+        for path, count in short_warnings:
+            print(f"   - {path} ({count} lines)")
+    else:
+        print(f"\n✅ No files are {args.min_threshold} lines or less")
+    
+    # Duplicate function names
+    duplicates = find_duplicate_functions(files, root_dir)
+    if duplicates:
+        print(f"\n⚠️  Warning: {len(duplicates)} function name(s) appear in multiple files (consider renaming):")
+        for func_name in sorted(duplicates.keys()):
+            paths = duplicates[func_name]
+            print(f"   - {func_name}:")
+            for path in paths:
+                print(f"       {path.relative_to(root_dir)}")
+    else:
+        print(f"\n✅ No duplicate function names")
+    
+    print()
+
+
+if __name__ == '__main__':
+    main()