mirror of
https://github.com/openclaw/openclaw.git
synced 2026-02-09 05:19:32 +08:00
274 lines
7.5 KiB
Go
274 lines
7.5 KiB
Go
package main
|
|
|
|
import (
|
|
"context"
|
|
"flag"
|
|
"fmt"
|
|
"log"
|
|
"os"
|
|
"path/filepath"
|
|
"sync"
|
|
"time"
|
|
)
|
|
|
|
type docJob struct {
|
|
index int
|
|
path string
|
|
rel string
|
|
}
|
|
|
|
type docResult struct {
|
|
index int
|
|
rel string
|
|
duration time.Duration
|
|
skipped bool
|
|
err error
|
|
}
|
|
|
|
func main() {
|
|
var (
|
|
targetLang = flag.String("lang", "zh-CN", "target language (e.g., zh-CN)")
|
|
sourceLang = flag.String("src", "en", "source language")
|
|
docsRoot = flag.String("docs", "docs", "docs root")
|
|
tmPath = flag.String("tm", "", "translation memory path")
|
|
mode = flag.String("mode", "segment", "translation mode (segment|doc)")
|
|
thinking = flag.String("thinking", "high", "thinking level (low|high)")
|
|
overwrite = flag.Bool("overwrite", false, "overwrite existing translations")
|
|
maxFiles = flag.Int("max", 0, "max files to process (0 = all)")
|
|
parallel = flag.Int("parallel", 1, "parallel workers for doc mode")
|
|
)
|
|
flag.Parse()
|
|
files := flag.Args()
|
|
if len(files) == 0 {
|
|
fatal(fmt.Errorf("no doc files provided"))
|
|
}
|
|
|
|
resolvedDocsRoot, err := filepath.Abs(*docsRoot)
|
|
if err != nil {
|
|
fatal(err)
|
|
}
|
|
|
|
if *tmPath == "" {
|
|
*tmPath = filepath.Join(resolvedDocsRoot, ".i18n", fmt.Sprintf("%s.tm.jsonl", *targetLang))
|
|
}
|
|
|
|
glossaryPath := filepath.Join(resolvedDocsRoot, ".i18n", fmt.Sprintf("glossary.%s.json", *targetLang))
|
|
glossary, err := LoadGlossary(glossaryPath)
|
|
if err != nil {
|
|
fatal(err)
|
|
}
|
|
|
|
translator, err := NewPiTranslator(*sourceLang, *targetLang, glossary, *thinking)
|
|
if err != nil {
|
|
fatal(err)
|
|
}
|
|
defer translator.Close()
|
|
|
|
tm, err := LoadTranslationMemory(*tmPath)
|
|
if err != nil {
|
|
fatal(err)
|
|
}
|
|
|
|
ordered, err := orderFiles(resolvedDocsRoot, files)
|
|
if err != nil {
|
|
fatal(err)
|
|
}
|
|
totalFiles := len(ordered)
|
|
preSkipped := 0
|
|
if *mode == "doc" && !*overwrite {
|
|
filtered, skipped, err := filterDocQueue(resolvedDocsRoot, *targetLang, ordered)
|
|
if err != nil {
|
|
fatal(err)
|
|
}
|
|
ordered = filtered
|
|
preSkipped = skipped
|
|
}
|
|
if *maxFiles > 0 && *maxFiles < len(ordered) {
|
|
ordered = ordered[:*maxFiles]
|
|
}
|
|
|
|
log.SetFlags(log.LstdFlags)
|
|
start := time.Now()
|
|
processed := 0
|
|
skipped := 0
|
|
|
|
if *parallel < 1 {
|
|
*parallel = 1
|
|
}
|
|
|
|
log.Printf("docs-i18n: mode=%s total=%d pending=%d pre_skipped=%d overwrite=%t thinking=%s parallel=%d", *mode, totalFiles, len(ordered), preSkipped, *overwrite, *thinking, *parallel)
|
|
switch *mode {
|
|
case "doc":
|
|
if *parallel > 1 {
|
|
proc, skip, err := runDocParallel(context.Background(), ordered, resolvedDocsRoot, *sourceLang, *targetLang, *overwrite, *parallel, glossary, *thinking)
|
|
if err != nil {
|
|
fatal(err)
|
|
}
|
|
processed += proc
|
|
skipped += skip
|
|
} else {
|
|
proc, skip, err := runDocSequential(context.Background(), ordered, translator, resolvedDocsRoot, *sourceLang, *targetLang, *overwrite)
|
|
if err != nil {
|
|
fatal(err)
|
|
}
|
|
processed += proc
|
|
skipped += skip
|
|
}
|
|
case "segment":
|
|
if *parallel > 1 {
|
|
fatal(fmt.Errorf("parallel processing is only supported in doc mode"))
|
|
}
|
|
proc, err := runSegmentSequential(context.Background(), ordered, translator, tm, resolvedDocsRoot, *sourceLang, *targetLang)
|
|
if err != nil {
|
|
fatal(err)
|
|
}
|
|
processed += proc
|
|
default:
|
|
fatal(fmt.Errorf("unknown mode: %s", *mode))
|
|
}
|
|
|
|
if err := tm.Save(); err != nil {
|
|
fatal(err)
|
|
}
|
|
elapsed := time.Since(start).Round(time.Millisecond)
|
|
log.Printf("docs-i18n: completed processed=%d skipped=%d elapsed=%s", processed, skipped, elapsed)
|
|
}
|
|
|
|
func runDocSequential(ctx context.Context, ordered []string, translator *PiTranslator, docsRoot, srcLang, tgtLang string, overwrite bool) (int, int, error) {
|
|
processed := 0
|
|
skipped := 0
|
|
for index, file := range ordered {
|
|
relPath := resolveRelPath(docsRoot, file)
|
|
log.Printf("docs-i18n: [%d/%d] start %s", index+1, len(ordered), relPath)
|
|
start := time.Now()
|
|
skip, err := processFileDoc(ctx, translator, docsRoot, file, srcLang, tgtLang, overwrite)
|
|
if err != nil {
|
|
return processed, skipped, err
|
|
}
|
|
if skip {
|
|
skipped++
|
|
log.Printf("docs-i18n: [%d/%d] skipped %s (%s)", index+1, len(ordered), relPath, time.Since(start).Round(time.Millisecond))
|
|
} else {
|
|
processed++
|
|
log.Printf("docs-i18n: [%d/%d] done %s (%s)", index+1, len(ordered), relPath, time.Since(start).Round(time.Millisecond))
|
|
}
|
|
}
|
|
return processed, skipped, nil
|
|
}
|
|
|
|
func runDocParallel(ctx context.Context, ordered []string, docsRoot, srcLang, tgtLang string, overwrite bool, parallel int, glossary []GlossaryEntry, thinking string) (int, int, error) {
|
|
jobs := make(chan docJob)
|
|
results := make(chan docResult, len(ordered))
|
|
ctx, cancel := context.WithCancel(ctx)
|
|
defer cancel()
|
|
|
|
var wg sync.WaitGroup
|
|
for worker := 0; worker < parallel; worker++ {
|
|
wg.Add(1)
|
|
go func(workerID int) {
|
|
defer wg.Done()
|
|
translator, err := NewPiTranslator(srcLang, tgtLang, glossary, thinking)
|
|
if err != nil {
|
|
results <- docResult{err: err}
|
|
return
|
|
}
|
|
defer translator.Close()
|
|
for job := range jobs {
|
|
if ctx.Err() != nil {
|
|
return
|
|
}
|
|
log.Printf("docs-i18n: [w%d %d/%d] start %s", workerID, job.index, len(ordered), job.rel)
|
|
start := time.Now()
|
|
skip, err := processFileDoc(ctx, translator, docsRoot, job.path, srcLang, tgtLang, overwrite)
|
|
results <- docResult{
|
|
index: job.index,
|
|
rel: job.rel,
|
|
duration: time.Since(start),
|
|
skipped: skip,
|
|
err: err,
|
|
}
|
|
if err != nil {
|
|
cancel()
|
|
return
|
|
}
|
|
}
|
|
}(worker + 1)
|
|
}
|
|
|
|
go func() {
|
|
for index, file := range ordered {
|
|
jobs <- docJob{index: index + 1, path: file, rel: resolveRelPath(docsRoot, file)}
|
|
}
|
|
close(jobs)
|
|
}()
|
|
|
|
processed := 0
|
|
skipped := 0
|
|
for i := 0; i < len(ordered); i++ {
|
|
result := <-results
|
|
if result.err != nil {
|
|
wg.Wait()
|
|
return processed, skipped, result.err
|
|
}
|
|
if result.skipped {
|
|
skipped++
|
|
log.Printf("docs-i18n: [w* %d/%d] skipped %s (%s)", result.index, len(ordered), result.rel, result.duration.Round(time.Millisecond))
|
|
} else {
|
|
processed++
|
|
log.Printf("docs-i18n: [w* %d/%d] done %s (%s)", result.index, len(ordered), result.rel, result.duration.Round(time.Millisecond))
|
|
}
|
|
}
|
|
wg.Wait()
|
|
return processed, skipped, nil
|
|
}
|
|
|
|
func runSegmentSequential(ctx context.Context, ordered []string, translator *PiTranslator, tm *TranslationMemory, docsRoot, srcLang, tgtLang string) (int, error) {
|
|
processed := 0
|
|
for index, file := range ordered {
|
|
relPath := resolveRelPath(docsRoot, file)
|
|
log.Printf("docs-i18n: [%d/%d] start %s", index+1, len(ordered), relPath)
|
|
start := time.Now()
|
|
if _, err := processFile(ctx, translator, tm, docsRoot, file, srcLang, tgtLang); err != nil {
|
|
return processed, err
|
|
}
|
|
processed++
|
|
log.Printf("docs-i18n: [%d/%d] done %s (%s)", index+1, len(ordered), relPath, time.Since(start).Round(time.Millisecond))
|
|
}
|
|
return processed, nil
|
|
}
|
|
|
|
func resolveRelPath(docsRoot, file string) string {
|
|
relPath := file
|
|
if _, rel, err := resolveDocsPath(docsRoot, file); err == nil {
|
|
relPath = rel
|
|
}
|
|
return relPath
|
|
}
|
|
|
|
func filterDocQueue(docsRoot, targetLang string, ordered []string) ([]string, int, error) {
|
|
pending := make([]string, 0, len(ordered))
|
|
skipped := 0
|
|
for _, file := range ordered {
|
|
absPath, relPath, err := resolveDocsPath(docsRoot, file)
|
|
if err != nil {
|
|
return nil, skipped, err
|
|
}
|
|
content, err := os.ReadFile(absPath)
|
|
if err != nil {
|
|
return nil, skipped, err
|
|
}
|
|
sourceHash := hashBytes(content)
|
|
outputPath := filepath.Join(docsRoot, targetLang, relPath)
|
|
skip, err := shouldSkipDoc(outputPath, sourceHash)
|
|
if err != nil {
|
|
return nil, skipped, err
|
|
}
|
|
if skip {
|
|
skipped++
|
|
continue
|
|
}
|
|
pending = append(pending, file)
|
|
}
|
|
return pending, skipped, nil
|
|
}
|