Files
openclaw/scripts/docs-i18n/main.go
2026-02-02 16:38:25 +01:00

274 lines
7.5 KiB
Go

package main
import (
"context"
"flag"
"fmt"
"log"
"os"
"path/filepath"
"sync"
"time"
)
type docJob struct {
index int
path string
rel string
}
type docResult struct {
index int
rel string
duration time.Duration
skipped bool
err error
}
func main() {
var (
targetLang = flag.String("lang", "zh-CN", "target language (e.g., zh-CN)")
sourceLang = flag.String("src", "en", "source language")
docsRoot = flag.String("docs", "docs", "docs root")
tmPath = flag.String("tm", "", "translation memory path")
mode = flag.String("mode", "segment", "translation mode (segment|doc)")
thinking = flag.String("thinking", "high", "thinking level (low|high)")
overwrite = flag.Bool("overwrite", false, "overwrite existing translations")
maxFiles = flag.Int("max", 0, "max files to process (0 = all)")
parallel = flag.Int("parallel", 1, "parallel workers for doc mode")
)
flag.Parse()
files := flag.Args()
if len(files) == 0 {
fatal(fmt.Errorf("no doc files provided"))
}
resolvedDocsRoot, err := filepath.Abs(*docsRoot)
if err != nil {
fatal(err)
}
if *tmPath == "" {
*tmPath = filepath.Join(resolvedDocsRoot, ".i18n", fmt.Sprintf("%s.tm.jsonl", *targetLang))
}
glossaryPath := filepath.Join(resolvedDocsRoot, ".i18n", fmt.Sprintf("glossary.%s.json", *targetLang))
glossary, err := LoadGlossary(glossaryPath)
if err != nil {
fatal(err)
}
translator, err := NewPiTranslator(*sourceLang, *targetLang, glossary, *thinking)
if err != nil {
fatal(err)
}
defer translator.Close()
tm, err := LoadTranslationMemory(*tmPath)
if err != nil {
fatal(err)
}
ordered, err := orderFiles(resolvedDocsRoot, files)
if err != nil {
fatal(err)
}
totalFiles := len(ordered)
preSkipped := 0
if *mode == "doc" && !*overwrite {
filtered, skipped, err := filterDocQueue(resolvedDocsRoot, *targetLang, ordered)
if err != nil {
fatal(err)
}
ordered = filtered
preSkipped = skipped
}
if *maxFiles > 0 && *maxFiles < len(ordered) {
ordered = ordered[:*maxFiles]
}
log.SetFlags(log.LstdFlags)
start := time.Now()
processed := 0
skipped := 0
if *parallel < 1 {
*parallel = 1
}
log.Printf("docs-i18n: mode=%s total=%d pending=%d pre_skipped=%d overwrite=%t thinking=%s parallel=%d", *mode, totalFiles, len(ordered), preSkipped, *overwrite, *thinking, *parallel)
switch *mode {
case "doc":
if *parallel > 1 {
proc, skip, err := runDocParallel(context.Background(), ordered, resolvedDocsRoot, *sourceLang, *targetLang, *overwrite, *parallel, glossary, *thinking)
if err != nil {
fatal(err)
}
processed += proc
skipped += skip
} else {
proc, skip, err := runDocSequential(context.Background(), ordered, translator, resolvedDocsRoot, *sourceLang, *targetLang, *overwrite)
if err != nil {
fatal(err)
}
processed += proc
skipped += skip
}
case "segment":
if *parallel > 1 {
fatal(fmt.Errorf("parallel processing is only supported in doc mode"))
}
proc, err := runSegmentSequential(context.Background(), ordered, translator, tm, resolvedDocsRoot, *sourceLang, *targetLang)
if err != nil {
fatal(err)
}
processed += proc
default:
fatal(fmt.Errorf("unknown mode: %s", *mode))
}
if err := tm.Save(); err != nil {
fatal(err)
}
elapsed := time.Since(start).Round(time.Millisecond)
log.Printf("docs-i18n: completed processed=%d skipped=%d elapsed=%s", processed, skipped, elapsed)
}
func runDocSequential(ctx context.Context, ordered []string, translator *PiTranslator, docsRoot, srcLang, tgtLang string, overwrite bool) (int, int, error) {
processed := 0
skipped := 0
for index, file := range ordered {
relPath := resolveRelPath(docsRoot, file)
log.Printf("docs-i18n: [%d/%d] start %s", index+1, len(ordered), relPath)
start := time.Now()
skip, err := processFileDoc(ctx, translator, docsRoot, file, srcLang, tgtLang, overwrite)
if err != nil {
return processed, skipped, err
}
if skip {
skipped++
log.Printf("docs-i18n: [%d/%d] skipped %s (%s)", index+1, len(ordered), relPath, time.Since(start).Round(time.Millisecond))
} else {
processed++
log.Printf("docs-i18n: [%d/%d] done %s (%s)", index+1, len(ordered), relPath, time.Since(start).Round(time.Millisecond))
}
}
return processed, skipped, nil
}
func runDocParallel(ctx context.Context, ordered []string, docsRoot, srcLang, tgtLang string, overwrite bool, parallel int, glossary []GlossaryEntry, thinking string) (int, int, error) {
jobs := make(chan docJob)
results := make(chan docResult, len(ordered))
ctx, cancel := context.WithCancel(ctx)
defer cancel()
var wg sync.WaitGroup
for worker := 0; worker < parallel; worker++ {
wg.Add(1)
go func(workerID int) {
defer wg.Done()
translator, err := NewPiTranslator(srcLang, tgtLang, glossary, thinking)
if err != nil {
results <- docResult{err: err}
return
}
defer translator.Close()
for job := range jobs {
if ctx.Err() != nil {
return
}
log.Printf("docs-i18n: [w%d %d/%d] start %s", workerID, job.index, len(ordered), job.rel)
start := time.Now()
skip, err := processFileDoc(ctx, translator, docsRoot, job.path, srcLang, tgtLang, overwrite)
results <- docResult{
index: job.index,
rel: job.rel,
duration: time.Since(start),
skipped: skip,
err: err,
}
if err != nil {
cancel()
return
}
}
}(worker + 1)
}
go func() {
for index, file := range ordered {
jobs <- docJob{index: index + 1, path: file, rel: resolveRelPath(docsRoot, file)}
}
close(jobs)
}()
processed := 0
skipped := 0
for i := 0; i < len(ordered); i++ {
result := <-results
if result.err != nil {
wg.Wait()
return processed, skipped, result.err
}
if result.skipped {
skipped++
log.Printf("docs-i18n: [w* %d/%d] skipped %s (%s)", result.index, len(ordered), result.rel, result.duration.Round(time.Millisecond))
} else {
processed++
log.Printf("docs-i18n: [w* %d/%d] done %s (%s)", result.index, len(ordered), result.rel, result.duration.Round(time.Millisecond))
}
}
wg.Wait()
return processed, skipped, nil
}
func runSegmentSequential(ctx context.Context, ordered []string, translator *PiTranslator, tm *TranslationMemory, docsRoot, srcLang, tgtLang string) (int, error) {
processed := 0
for index, file := range ordered {
relPath := resolveRelPath(docsRoot, file)
log.Printf("docs-i18n: [%d/%d] start %s", index+1, len(ordered), relPath)
start := time.Now()
if _, err := processFile(ctx, translator, tm, docsRoot, file, srcLang, tgtLang); err != nil {
return processed, err
}
processed++
log.Printf("docs-i18n: [%d/%d] done %s (%s)", index+1, len(ordered), relPath, time.Since(start).Round(time.Millisecond))
}
return processed, nil
}
func resolveRelPath(docsRoot, file string) string {
relPath := file
if _, rel, err := resolveDocsPath(docsRoot, file); err == nil {
relPath = rel
}
return relPath
}
func filterDocQueue(docsRoot, targetLang string, ordered []string) ([]string, int, error) {
pending := make([]string, 0, len(ordered))
skipped := 0
for _, file := range ordered {
absPath, relPath, err := resolveDocsPath(docsRoot, file)
if err != nil {
return nil, skipped, err
}
content, err := os.ReadFile(absPath)
if err != nil {
return nil, skipped, err
}
sourceHash := hashBytes(content)
outputPath := filepath.Join(docsRoot, targetLang, relPath)
skip, err := shouldSkipDoc(outputPath, sourceHash)
if err != nil {
return nil, skipped, err
}
if skip {
skipped++
continue
}
pending = append(pending, file)
}
return pending, skipped, nil
}