Files
openclaw/scripts/docs-i18n/markdown_segments.go
Josh Palmer 0e0e395b9e Docs: add zh-CN entrypoint translations (#6300)
* Docs: add zh-CN entrypoint translations

* Docs: harden docs-i18n parsing
2026-02-01 15:22:05 +01:00

132 lines
2.8 KiB
Go

package main
import (
"sort"
"strings"
"github.com/yuin/goldmark"
"github.com/yuin/goldmark/ast"
"github.com/yuin/goldmark/extension"
"github.com/yuin/goldmark/text"
)
func extractSegments(body, relPath string) ([]Segment, error) {
source := []byte(body)
r := text.NewReader(source)
md := goldmark.New(
goldmark.WithExtensions(extension.GFM),
)
doc := md.Parser().Parse(r)
segments := make([]Segment, 0, 128)
skipDepth := 0
var lastBlock ast.Node
err := ast.Walk(doc, func(n ast.Node, entering bool) (ast.WalkStatus, error) {
switch n.(type) {
case *ast.CodeBlock, *ast.FencedCodeBlock, *ast.CodeSpan, *ast.HTMLBlock, *ast.RawHTML:
if entering {
skipDepth++
} else {
skipDepth--
}
return ast.WalkContinue, nil
}
if !entering || skipDepth > 0 {
return ast.WalkContinue, nil
}
textNode, ok := n.(*ast.Text)
if !ok {
return ast.WalkContinue, nil
}
block := blockParent(textNode)
if block == nil {
return ast.WalkContinue, nil
}
textValue := string(textNode.Segment.Value(source))
if strings.TrimSpace(textValue) == "" {
return ast.WalkContinue, nil
}
start := textNode.Segment.Start
stop := textNode.Segment.Stop
if len(segments) > 0 && lastBlock == block {
last := &segments[len(segments)-1]
gap := string(source[last.Stop:start])
if strings.TrimSpace(gap) == "" {
last.Stop = stop
return ast.WalkContinue, nil
}
}
segments = append(segments, Segment{Start: start, Stop: stop})
lastBlock = block
return ast.WalkContinue, nil
})
if err != nil {
return nil, err
}
filtered := make([]Segment, 0, len(segments))
for _, seg := range segments {
textValue := string(source[seg.Start:seg.Stop])
trimmed := strings.TrimSpace(textValue)
if trimmed == "" {
continue
}
textHash := hashText(textValue)
segmentID := segmentID(relPath, textHash)
filtered = append(filtered, Segment{
Start: seg.Start,
Stop: seg.Stop,
Text: textValue,
TextHash: textHash,
SegmentID: segmentID,
})
}
sort.Slice(filtered, func(i, j int) bool {
return filtered[i].Start < filtered[j].Start
})
return filtered, nil
}
func blockParent(n ast.Node) ast.Node {
for node := n.Parent(); node != nil; node = node.Parent() {
if isTranslatableBlock(node) {
return node
}
}
return nil
}
func isTranslatableBlock(n ast.Node) bool {
switch n.(type) {
case *ast.Paragraph, *ast.Heading, *ast.ListItem:
return true
default:
return false
}
}
func applyTranslations(body string, segments []Segment) string {
if len(segments) == 0 {
return body
}
var out strings.Builder
last := 0
for _, seg := range segments {
if seg.Start < last {
continue
}
out.WriteString(body[last:seg.Start])
out.WriteString(seg.Translated)
last = seg.Stop
}
out.WriteString(body[last:])
return out.String()
}