diff --git a/services/gitdiff/highlightdiff.go b/services/gitdiff/highlightdiff.go index e8be063e69..11d5e6dd5a 100644 --- a/services/gitdiff/highlightdiff.go +++ b/services/gitdiff/highlightdiff.go @@ -11,8 +11,38 @@ import ( "github.com/sergi/go-diff/diffmatchpatch" ) -// token is a html tag or entity, eg: "", "", "<" -func extractHTMLToken(s string) (before, token, after string, valid bool) { +// extractDiffTokenRemainingFullTag tries to extract full tag with content from the remaining string +// e.g. for input: "contentthe-rest...", it returns "content", "the-rest...", true +func extractDiffTokenRemainingFullTag(s string) (token, after string, valid bool) { + pos := 0 + for ; pos < len(s); pos++ { + c := s[pos] + if c == '<' { + break + } + // keep in mind: even if we'd like to relax this check, + // we should never ignore "&" because it is for HTML entity and can't be safely used in the diff algorithm, + // because diff between "<" and ">" will generate broken result. + isSymbolChar := 'a' <= c && c <= 'z' || 'A' <= c && c <= 'Z' || '0' <= c && c <= '9' || c == '_' || c == '-' + if !isSymbolChar { + return "", s, false + } + } + if pos+1 >= len(s) || s[pos+1] != '/' { + return "", s, false + } + pos2 := strings.IndexByte(s[pos:], '>') + if pos2 == -1 { + return "", s, false + } + return s[:pos+pos2+1], s[pos+pos2+1:], true +} + +// Returned token: +// * full tag with content: "<content>", it is used to optimize diff results to highlight the whole changed symbol +// * opening/close tag: "" or "" +// * HTML entity: "<" +func extractDiffToken(s string) (before, token, after string, valid bool) { for pos1 := 0; pos1 < len(s); pos1++ { switch s[pos1] { case '<': @@ -20,7 +50,15 @@ func extractHTMLToken(s string) (before, token, after string, valid bool) { if pos2 == -1 { return "", "", s, false } - return s[:pos1], s[pos1 : pos1+pos2+1], s[pos1+pos2+1:], true + before, token, after = s[:pos1], s[pos1:pos1+pos2+1], s[pos1+pos2+1:] + + if !strings.HasPrefix(token, "content>`, to optimize diff results + if fullTokenRemaining, fullTokenAfter, ok := extractDiffTokenRemainingFullTag(after); ok { + return before, "<" + token + fullTokenRemaining + ">", fullTokenAfter, true + } + } + return before, token, after, true case '&': pos2 := strings.IndexByte(s[pos1:], ';') if pos2 == -1 { @@ -46,8 +84,6 @@ type highlightCodeDiff struct { tokenPlaceholderMap map[string]rune placeholderOverflowCount int - - lineWrapperTags []string } func newHighlightCodeDiff() *highlightCodeDiff { @@ -88,10 +124,6 @@ func (hcd *highlightCodeDiff) collectUsedRunes(code template.HTML) { } func (hcd *highlightCodeDiff) diffLineWithHighlight(lineType DiffLineType, codeA, codeB template.HTML) template.HTML { - return hcd.diffLineWithHighlightWrapper(nil, lineType, codeA, codeB) -} - -func (hcd *highlightCodeDiff) diffLineWithHighlightWrapper(lineWrapperTags []string, lineType DiffLineType, codeA, codeB template.HTML) template.HTML { hcd.collectUsedRunes(codeA) hcd.collectUsedRunes(codeB) @@ -104,16 +136,13 @@ func (hcd *highlightCodeDiff) diffLineWithHighlightWrapper(lineWrapperTags []str buf := bytes.NewBuffer(nil) - // restore the line wrapper tags and , if necessary - for _, tag := range lineWrapperTags { - buf.WriteString(tag) - } - addedCodePrefix := hcd.registerTokenAsPlaceholder(``) - removedCodePrefix := hcd.registerTokenAsPlaceholder(``) - codeTagSuffix := hcd.registerTokenAsPlaceholder(``) + addedCodeSuffix := hcd.registerTokenAsPlaceholder(``) - if codeTagSuffix != 0 { + removedCodePrefix := hcd.registerTokenAsPlaceholder(``) + removedCodeSuffix := hcd.registerTokenAsPlaceholder(``) + + if removedCodeSuffix != 0 { for _, diff := range diffs { switch { case diff.Type == diffmatchpatch.DiffEqual: @@ -121,11 +150,11 @@ func (hcd *highlightCodeDiff) diffLineWithHighlightWrapper(lineWrapperTags []str case diff.Type == diffmatchpatch.DiffInsert && lineType == DiffLineAdd: buf.WriteRune(addedCodePrefix) buf.WriteString(diff.Text) - buf.WriteRune(codeTagSuffix) + buf.WriteRune(addedCodeSuffix) case diff.Type == diffmatchpatch.DiffDelete && lineType == DiffLineDel: buf.WriteRune(removedCodePrefix) buf.WriteString(diff.Text) - buf.WriteRune(codeTagSuffix) + buf.WriteRune(removedCodeSuffix) } } } else { @@ -137,9 +166,6 @@ func (hcd *highlightCodeDiff) diffLineWithHighlightWrapper(lineWrapperTags []str } } } - for range lineWrapperTags { - buf.WriteString("") - } return hcd.recoverOneDiff(buf.String()) } @@ -160,33 +186,26 @@ func (hcd *highlightCodeDiff) convertToPlaceholders(htmlContent template.HTML) s var tagStack []string res := strings.Builder{} - firstRunForLineTags := hcd.lineWrapperTags == nil + htmlCode := strings.TrimSpace(string(htmlContent)) + + // the standard chroma highlight HTML is ` ... ` + // the line wrapper tags should be removed before diff + if strings.HasPrefix(htmlCode, `") + } var beforeToken, token string var valid bool - - htmlCode := string(htmlContent) - // the standard chroma highlight HTML is " ... " for { - beforeToken, token, htmlCode, valid = extractHTMLToken(htmlCode) + beforeToken, token, htmlCode, valid = extractDiffToken(htmlCode) if !valid || token == "" { break } // write the content before the token into result string, and consume the token in the string res.WriteString(beforeToken) - // the line wrapper tags should be removed before diff - if strings.HasPrefix(token, `") - continue - } - var tokenInMap string - if strings.HasSuffix(token, "" for "" tokenInMap = token + "" tagStack = tagStack[:len(tagStack)-1] - } else if token[0] == '<' { // for opening tag - tokenInMap = token - tagStack = append(tagStack, token) - } else if token[0] == '&' { // for html entity + } else if token[0] == '<' { + if token[1] == '<' { + // full tag `<content>`, recover to `content` + tokenInMap = token + } else { + // opening tag + tokenInMap = token + tagStack = append(tagStack, token) + } + } else if token[0] == '&' { // for HTML entity tokenInMap = token } // else: impossible @@ -210,8 +235,13 @@ func (hcd *highlightCodeDiff) convertToPlaceholders(htmlContent template.HTML) s // unfortunately, all private use runes has been exhausted, no more placeholder could be used, no more converting // usually, the exhausting won't occur in real cases, the magnitude of used placeholders is not larger than that of the CSS classes outputted by chroma. hcd.placeholderOverflowCount++ + if strings.HasPrefix(token, "<<") { + pos1 := strings.IndexByte(token, '>') + pos2 := strings.LastIndexByte(token, '<') + res.WriteString(token[pos1+1 : pos2]) // recover to `content` from "<content>" + } if strings.HasPrefix(token, "&") { - // when the token is a html entity, something must be outputted even if there is no placeholder. + // when the token is an HTML entity, something must be outputted even if there is no placeholder. res.WriteRune(0xFFFD) // replacement character TODO: how to handle this case more gracefully? res.WriteString(token[1:]) // still output the entity code part, otherwise there will be no diff result. } @@ -241,10 +271,16 @@ func (hcd *highlightCodeDiff) recoverOneDiff(str string) template.HTML { continue // if no opening tag in stack yet, skip the closing tag } tagStack = tagStack[:len(tagStack)-1] - } else if token[0] == '<' { // for opening tag - tokenToRecover = token - tagStack = append(tagStack, token) - } else if token[0] == '&' { // for html entity + } else if token[0] == '<' { // for HTML tag + if token[1] == '<' { + // full tag `<content>`, recover to `content` + tokenToRecover = token[1 : len(token)-1] + } else { + // opening tag + tokenToRecover = token + tagStack = append(tagStack, token) + } + } else if token[0] == '&' { // for HTML entity tokenToRecover = token } // else: impossible sb.WriteString(tokenToRecover) diff --git a/services/gitdiff/highlightdiff_test.go b/services/gitdiff/highlightdiff_test.go index 0df2e29d13..573177f401 100644 --- a/services/gitdiff/highlightdiff_test.go +++ b/services/gitdiff/highlightdiff_test.go @@ -9,6 +9,8 @@ import ( "strings" "testing" + "code.gitea.io/gitea/modules/highlight" + "github.com/stretchr/testify/assert" ) @@ -18,9 +20,9 @@ func TestDiffWithHighlight(t *testing.T) { codeA := template.HTML(`x foo y`) codeB := template.HTML(`x bar y`) outDel := hcd.diffLineWithHighlight(DiffLineDel, codeA, codeB) - assert.Equal(t, `x foo y`, string(outDel)) + assert.Equal(t, `x foo y`, string(outDel)) outAdd := hcd.diffLineWithHighlight(DiffLineAdd, codeA, codeB) - assert.Equal(t, `x bar y`, string(outAdd)) + assert.Equal(t, `x bar y`, string(outAdd)) }) t.Run("CleanUp", func(t *testing.T) { @@ -40,6 +42,14 @@ func TestDiffWithHighlight(t *testing.T) { assert.Equal(t, "", string(hcd.recoverOneDiff("O"))) assert.Empty(t, string(hcd.recoverOneDiff("C"))) }) + + t.Run("ComplexDiff", func(t *testing.T) { + oldCode, _ := highlight.RenderCodeFast("a.go", "Go", `xxx || yyy`) + newCode, _ := highlight.RenderCodeFast("a.go", "Go", `bot.xxx || bot.yyy`) + hcd := newHighlightCodeDiff() + out := hcd.diffLineWithHighlight(DiffLineAdd, oldCode, newCode) + assert.Equal(t, `bot.xxx || bot.yyy`, string(out)) + }) } func TestDiffWithHighlightPlaceholder(t *testing.T) { @@ -64,6 +74,11 @@ func TestDiffWithHighlightPlaceholderExhausted(t *testing.T) { assert.Equal(t, placeHolderAmp+"lt;", string(output)) output = hcd.diffLineWithHighlight(DiffLineAdd, `<`, `>`) assert.Equal(t, placeHolderAmp+"gt;", string(output)) + + output = hcd.diffLineWithHighlight(DiffLineDel, `foo`, `bar`) + assert.Equal(t, "foo", string(output)) + output = hcd.diffLineWithHighlight(DiffLineAdd, `foo`, `bar`) + assert.Equal(t, "bar", string(output)) } func TestDiffWithHighlightTagMatch(t *testing.T) {