Switch Unicode Escaping to a VSCode-like system (#19990)

This PR rewrites the invisible unicode detection algorithm to more
closely match that of the Monaco editor on the system. It provides a
technique for detecting ambiguous characters and relaxes the detection
of combining marks.

Control characters are in addition detected as invisible in this
implementation whereas they are not on monaco but this is related to
font issues.

Close #19913

Signed-off-by: Andrew Thornton <art27@cantab.net>
This commit is contained in:
zeripath 2022-08-13 19:32:34 +01:00 committed by GitHub
parent 11dc6df5be
commit 99efa02edf
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
29 changed files with 2107 additions and 371 deletions

View file

@ -0,0 +1,54 @@
// This file is generated by modules/charset/ambiguous/generate.go DO NOT EDIT
// Copyright 2022 The Gitea Authors. All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
package charset
import (
"sort"
"strings"
"unicode"
"code.gitea.io/gitea/modules/translation"
)
// AmbiguousTablesForLocale provides the table of ambiguous characters for this locale.
func AmbiguousTablesForLocale(locale translation.Locale) []*AmbiguousTable {
key := locale.Language()
var table *AmbiguousTable
var ok bool
for len(key) > 0 {
if table, ok = AmbiguousCharacters[key]; ok {
break
}
idx := strings.LastIndexAny(key, "-_")
if idx < 0 {
key = ""
} else {
key = key[:idx]
}
}
if table == nil {
table = AmbiguousCharacters["_default"]
}
return []*AmbiguousTable{
table,
AmbiguousCharacters["_common"],
}
}
func isAmbiguous(r rune, confusableTo *rune, tables ...*AmbiguousTable) bool {
for _, table := range tables {
if !unicode.Is(table.RangeTable, r) {
continue
}
i := sort.Search(len(table.Confusable), func(i int) bool {
return table.Confusable[i] >= r
})
(*confusableTo) = table.With[i]
return true
}
return false
}

File diff suppressed because one or more lines are too long

View file

@ -0,0 +1,178 @@
// Copyright 2022 The Gitea Authors. All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
package main
import (
"bytes"
"flag"
"fmt"
"go/format"
"os"
"sort"
"text/template"
"unicode"
"code.gitea.io/gitea/modules/json"
"golang.org/x/text/unicode/rangetable"
)
// ambiguous.json provides a one to one mapping of ambiguous characters to other characters
// See https://github.com/hediet/vscode-unicode-data/blob/main/out/ambiguous.json
type AmbiguousTable struct {
Confusable []rune
With []rune
Locale string
RangeTable *unicode.RangeTable
}
type RunePair struct {
Confusable rune
With rune
}
var verbose bool
func main() {
flag.Usage = func() {
fmt.Fprintf(os.Stderr, `%s: Generate AmbiguousCharacter
Usage: %[1]s [-v] [-o output.go] ambiguous.json
`, os.Args[0])
flag.PrintDefaults()
}
output := ""
flag.BoolVar(&verbose, "v", false, "verbose output")
flag.StringVar(&output, "o", "ambiguous_gen.go", "file to output to")
flag.Parse()
input := flag.Arg(0)
if input == "" {
input = "ambiguous.json"
}
bs, err := os.ReadFile(input)
if err != nil {
fatalf("Unable to read: %s Err: %v", input, err)
}
var unwrapped string
if err := json.Unmarshal(bs, &unwrapped); err != nil {
fatalf("Unable to unwrap content in: %s Err: %v", input, err)
}
fromJSON := map[string][]uint32{}
if err := json.Unmarshal([]byte(unwrapped), &fromJSON); err != nil {
fatalf("Unable to unmarshal content in: %s Err: %v", input, err)
}
tables := make([]*AmbiguousTable, 0, len(fromJSON))
for locale, chars := range fromJSON {
table := &AmbiguousTable{Locale: locale}
table.Confusable = make([]rune, 0, len(chars)/2)
table.With = make([]rune, 0, len(chars)/2)
pairs := make([]RunePair, len(chars)/2)
for i := 0; i < len(chars); i += 2 {
pairs[i/2].Confusable, pairs[i/2].With = rune(chars[i]), rune(chars[i+1])
}
sort.Slice(pairs, func(i, j int) bool {
return pairs[i].Confusable < pairs[j].Confusable
})
for _, pair := range pairs {
table.Confusable = append(table.Confusable, pair.Confusable)
table.With = append(table.With, pair.With)
}
table.RangeTable = rangetable.New(table.Confusable...)
tables = append(tables, table)
}
sort.Slice(tables, func(i, j int) bool {
return tables[i].Locale < tables[j].Locale
})
data := map[string]interface{}{
"Tables": tables,
}
if err := runTemplate(generatorTemplate, output, &data); err != nil {
fatalf("Unable to run template: %v", err)
}
}
func runTemplate(t *template.Template, filename string, data interface{}) error {
buf := bytes.NewBuffer(nil)
if err := t.Execute(buf, data); err != nil {
return fmt.Errorf("unable to execute template: %w", err)
}
bs, err := format.Source(buf.Bytes())
if err != nil {
verbosef("Bad source:\n%s", buf.String())
return fmt.Errorf("unable to format source: %w", err)
}
file, err := os.Create(filename)
if err != nil {
return fmt.Errorf("failed to create file %s because %w", filename, err)
}
defer file.Close()
_, err = file.Write(bs)
if err != nil {
return fmt.Errorf("unable to write generated source: %w", err)
}
return nil
}
var generatorTemplate = template.Must(template.New("ambiguousTemplate").Parse(`// This file is generated by modules/charset/ambiguous/generate.go DO NOT EDIT
// Copyright 2022 The Gitea Authors. All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
package charset
import "unicode"
// This file is generated from https://github.com/hediet/vscode-unicode-data/blob/main/out/ambiguous.json
// AmbiguousTable matches a confusable rune with its partner for the Locale
type AmbiguousTable struct {
Confusable []rune
With []rune
Locale string
RangeTable *unicode.RangeTable
}
// AmbiguousCharacters provides a map by locale name to the confusable characters in that locale
var AmbiguousCharacters = map[string]*AmbiguousTable{
{{range .Tables}}{{printf "%q:" .Locale}} {
Confusable: []rune{ {{range .Confusable}}{{.}},{{end}} },
With: []rune{ {{range .With}}{{.}},{{end}} },
Locale: {{printf "%q" .Locale}},
RangeTable: &unicode.RangeTable{
R16: []unicode.Range16{
{{range .RangeTable.R16 }} {Lo:{{.Lo}}, Hi:{{.Hi}}, Stride: {{.Stride}}},
{{end}} },
R32: []unicode.Range32{
{{range .RangeTable.R32}} {Lo:{{.Lo}}, Hi:{{.Hi}}, Stride: {{.Stride}}},
{{end}} },
LatinOffset: {{.RangeTable.LatinOffset}},
},
},
{{end}}
}
`))
func logf(format string, args ...interface{}) {
fmt.Fprintf(os.Stderr, format+"\n", args...)
}
func verbosef(format string, args ...interface{}) {
if verbose {
logf(format, args...)
}
}
func fatalf(format string, args ...interface{}) {
logf("fatal: "+format+"\n", args...)
os.Exit(1)
}

File diff suppressed because one or more lines are too long

View file

@ -0,0 +1,32 @@
// Copyright 2022 The Gitea Authors. All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
package charset
import (
"sort"
"testing"
"unicode"
"github.com/stretchr/testify/assert"
)
func TestAmbiguousCharacters(t *testing.T) {
for locale, ambiguous := range AmbiguousCharacters {
assert.Equal(t, locale, ambiguous.Locale)
assert.Equal(t, len(ambiguous.Confusable), len(ambiguous.With))
assert.True(t, sort.SliceIsSorted(ambiguous.Confusable, func(i, j int) bool {
return ambiguous.Confusable[i] < ambiguous.Confusable[j]
}))
for _, confusable := range ambiguous.Confusable {
assert.True(t, unicode.Is(ambiguous.RangeTable, confusable))
i := sort.Search(len(ambiguous.Confusable), func(j int) bool {
return ambiguous.Confusable[j] >= confusable
})
found := i < len(ambiguous.Confusable) && ambiguous.Confusable[i] == confusable
assert.True(t, found, "%c is not in %d", confusable, i)
}
}
}

View file

@ -0,0 +1,44 @@
// Copyright 2022 The Gitea Authors. All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
package charset
import (
"bytes"
"io"
)
// BreakWriter wraps an io.Writer to always write '\n' as '<br>'
type BreakWriter struct {
io.Writer
}
// Write writes the provided byte slice transparently replacing '\n' with '<br>'
func (b *BreakWriter) Write(bs []byte) (n int, err error) {
pos := 0
for pos < len(bs) {
idx := bytes.IndexByte(bs[pos:], '\n')
if idx < 0 {
wn, err := b.Writer.Write(bs[pos:])
return n + wn, err
}
if idx > 0 {
wn, err := b.Writer.Write(bs[pos : pos+idx])
n += wn
if err != nil {
return n, err
}
}
if _, err = b.Writer.Write([]byte("<br>")); err != nil {
return n, err
}
pos += idx + 1
n++
}
return n, err
}

View file

@ -0,0 +1,69 @@
// Copyright 2022 The Gitea Authors. All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
package charset
import (
"strings"
"testing"
)
func TestBreakWriter_Write(t *testing.T) {
tests := []struct {
name string
kase string
expect string
wantErr bool
}{
{
name: "noline",
kase: "abcdefghijklmnopqrstuvwxyz",
expect: "abcdefghijklmnopqrstuvwxyz",
},
{
name: "endline",
kase: "abcdefghijklmnopqrstuvwxyz\n",
expect: "abcdefghijklmnopqrstuvwxyz<br>",
},
{
name: "startline",
kase: "\nabcdefghijklmnopqrstuvwxyz",
expect: "<br>abcdefghijklmnopqrstuvwxyz",
},
{
name: "onlyline",
kase: "\n\n\n",
expect: "<br><br><br>",
},
{
name: "empty",
kase: "",
expect: "",
},
{
name: "midline",
kase: "\nabc\ndefghijkl\nmnopqrstuvwxy\nz",
expect: "<br>abc<br>defghijkl<br>mnopqrstuvwxy<br>z",
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
buf := &strings.Builder{}
b := &BreakWriter{
Writer: buf,
}
n, err := b.Write([]byte(tt.kase))
if (err != nil) != tt.wantErr {
t.Errorf("BreakWriter.Write() error = %v, wantErr %v", err, tt.wantErr)
return
}
if n != len(tt.kase) {
t.Errorf("BreakWriter.Write() = %v, want %v", n, len(tt.kase))
}
if buf.String() != tt.expect {
t.Errorf("BreakWriter.Write() wrote %q, want %v", buf.String(), tt.expect)
}
})
}
}

View file

@ -1,236 +1,58 @@
// Copyright 2021 The Gitea Authors. All rights reserved.
// Copyright 2022 The Gitea Authors. All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
//go:generate go run invisible/generate.go -v -o ./invisible_gen.go
//go:generate go run ambiguous/generate.go -v -o ./ambiguous_gen.go ambiguous/ambiguous.json
package charset
import (
"bytes"
"fmt"
"io"
"strings"
"unicode"
"unicode/utf8"
"golang.org/x/text/unicode/bidi"
"code.gitea.io/gitea/modules/log"
"code.gitea.io/gitea/modules/translation"
)
// EscapeStatus represents the findings of the unicode escaper
type EscapeStatus struct {
Escaped bool
HasError bool
HasBadRunes bool
HasControls bool
HasSpaces bool
HasMarks bool
HasBIDI bool
BadBIDI bool
HasRTLScript bool
HasLTRScript bool
// RuneNBSP is the codepoint for NBSP
const RuneNBSP = 0xa0
// EscapeControlHTML escapes the unicode control sequences in a provided html document
func EscapeControlHTML(text string, locale translation.Locale, allowed ...rune) (escaped *EscapeStatus, output string) {
sb := &strings.Builder{}
outputStream := &HTMLStreamerWriter{Writer: sb}
streamer := NewEscapeStreamer(locale, outputStream, allowed...).(*escapeStreamer)
if err := StreamHTML(strings.NewReader(text), streamer); err != nil {
streamer.escaped.HasError = true
log.Error("Error whilst escaping: %v", err)
}
return streamer.escaped, sb.String()
}
// Or combines two EscapeStatus structs into one representing the conjunction of the two
func (status EscapeStatus) Or(other EscapeStatus) EscapeStatus {
st := status
st.Escaped = st.Escaped || other.Escaped
st.HasError = st.HasError || other.HasError
st.HasBadRunes = st.HasBadRunes || other.HasBadRunes
st.HasControls = st.HasControls || other.HasControls
st.HasSpaces = st.HasSpaces || other.HasSpaces
st.HasMarks = st.HasMarks || other.HasMarks
st.HasBIDI = st.HasBIDI || other.HasBIDI
st.BadBIDI = st.BadBIDI || other.BadBIDI
st.HasRTLScript = st.HasRTLScript || other.HasRTLScript
st.HasLTRScript = st.HasLTRScript || other.HasLTRScript
return st
// EscapeControlReaders escapes the unicode control sequences in a provider reader and writer in a locale and returns the findings as an EscapeStatus and the escaped []byte
func EscapeControlReader(reader io.Reader, writer io.Writer, locale translation.Locale, allowed ...rune) (escaped *EscapeStatus, err error) {
outputStream := &HTMLStreamerWriter{Writer: writer}
streamer := NewEscapeStreamer(locale, outputStream, allowed...).(*escapeStreamer)
if err = StreamHTML(reader, streamer); err != nil {
streamer.escaped.HasError = true
log.Error("Error whilst escaping: %v", err)
}
return streamer.escaped, err
}
// EscapeControlString escapes the unicode control sequences in a provided string and returns the findings as an EscapeStatus and the escaped string
func EscapeControlString(text string) (EscapeStatus, string) {
func EscapeControlString(text string, locale translation.Locale, allowed ...rune) (escaped *EscapeStatus, output string) {
sb := &strings.Builder{}
escaped, _ := EscapeControlReader(strings.NewReader(text), sb)
return escaped, sb.String()
}
outputStream := &HTMLStreamerWriter{Writer: sb}
streamer := NewEscapeStreamer(locale, outputStream, allowed...).(*escapeStreamer)
// EscapeControlBytes escapes the unicode control sequences a provided []byte and returns the findings as an EscapeStatus and the escaped []byte
func EscapeControlBytes(text []byte) (EscapeStatus, []byte) {
buf := &bytes.Buffer{}
escaped, _ := EscapeControlReader(bytes.NewReader(text), buf)
return escaped, buf.Bytes()
}
// EscapeControlReader escapes the unicode control sequences a provided Reader writing the escaped output to the output and returns the findings as an EscapeStatus and an error
func EscapeControlReader(text io.Reader, output io.Writer) (escaped EscapeStatus, err error) {
buf := make([]byte, 4096)
readStart := 0
runeCount := 0
var n int
var writePos int
lineHasBIDI := false
lineHasRTLScript := false
lineHasLTRScript := false
readingloop:
for err == nil {
n, err = text.Read(buf[readStart:])
bs := buf[:n+readStart]
n = len(bs)
i := 0
for i < len(bs) {
r, size := utf8.DecodeRune(bs[i:])
runeCount++
// Now handle the codepoints
switch {
case r == utf8.RuneError:
if writePos < i {
if _, err = output.Write(bs[writePos:i]); err != nil {
escaped.HasError = true
return
}
writePos = i
}
// runes can be at most 4 bytes - so...
if len(bs)-i <= 3 {
// if not request more data
copy(buf, bs[i:])
readStart = n - i
writePos = 0
continue readingloop
}
// this is a real broken rune
escaped.HasBadRunes = true
escaped.Escaped = true
if err = writeBroken(output, bs[i:i+size]); err != nil {
escaped.HasError = true
return
}
writePos += size
case r == '\n':
if lineHasBIDI && !lineHasRTLScript && lineHasLTRScript {
escaped.BadBIDI = true
}
lineHasBIDI = false
lineHasRTLScript = false
lineHasLTRScript = false
case runeCount == 1 && r == 0xFEFF: // UTF BOM
// the first BOM is safe
case r == '\r' || r == '\t' || r == ' ':
// These are acceptable control characters and space characters
case unicode.IsSpace(r):
escaped.HasSpaces = true
escaped.Escaped = true
if writePos < i {
if _, err = output.Write(bs[writePos:i]); err != nil {
escaped.HasError = true
return
}
}
if err = writeEscaped(output, r); err != nil {
escaped.HasError = true
return
}
writePos = i + size
case unicode.Is(unicode.Bidi_Control, r):
escaped.Escaped = true
escaped.HasBIDI = true
if writePos < i {
if _, err = output.Write(bs[writePos:i]); err != nil {
escaped.HasError = true
return
}
}
lineHasBIDI = true
if err = writeEscaped(output, r); err != nil {
escaped.HasError = true
return
}
writePos = i + size
case unicode.Is(unicode.C, r):
escaped.Escaped = true
escaped.HasControls = true
if writePos < i {
if _, err = output.Write(bs[writePos:i]); err != nil {
escaped.HasError = true
return
}
}
if err = writeEscaped(output, r); err != nil {
escaped.HasError = true
return
}
writePos = i + size
case unicode.Is(unicode.M, r):
escaped.Escaped = true
escaped.HasMarks = true
if writePos < i {
if _, err = output.Write(bs[writePos:i]); err != nil {
escaped.HasError = true
return
}
}
if err = writeEscaped(output, r); err != nil {
escaped.HasError = true
return
}
writePos = i + size
default:
p, _ := bidi.Lookup(bs[i : i+size])
c := p.Class()
if c == bidi.R || c == bidi.AL {
lineHasRTLScript = true
escaped.HasRTLScript = true
} else if c == bidi.L {
lineHasLTRScript = true
escaped.HasLTRScript = true
}
}
i += size
}
if n > 0 {
// we read something...
// write everything unwritten
if writePos < i {
if _, err = output.Write(bs[writePos:i]); err != nil {
escaped.HasError = true
return
}
}
// reset the starting positions for the next read
readStart = 0
writePos = 0
}
if err := streamer.Text(text); err != nil {
streamer.escaped.HasError = true
log.Error("Error whilst escaping: %v", err)
}
if readStart > 0 {
// this means that there is an incomplete or broken rune at 0-readStart and we read nothing on the last go round
escaped.Escaped = true
escaped.HasBadRunes = true
if err = writeBroken(output, buf[:readStart]); err != nil {
escaped.HasError = true
return
}
}
if err == io.EOF {
if lineHasBIDI && !lineHasRTLScript && lineHasLTRScript {
escaped.BadBIDI = true
}
err = nil
return
}
escaped.HasError = true
return escaped, err
}
func writeBroken(output io.Writer, bs []byte) (err error) {
_, err = fmt.Fprintf(output, `<span class="broken-code-point">&lt;%X&gt;</span>`, bs)
return err
}
func writeEscaped(output io.Writer, r rune) (err error) {
_, err = fmt.Fprintf(output, `<span class="escaped-code-point" data-escaped="[U+%04X]"><span class="char">%c</span></span>`, r, r)
return err
return streamer.escaped, sb.String()
}

View file

@ -0,0 +1,28 @@
// Copyright 2021 The Gitea Authors. All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
package charset
// EscapeStatus represents the findings of the unicode escaper
type EscapeStatus struct {
Escaped bool
HasError bool
HasBadRunes bool
HasInvisible bool
HasAmbiguous bool
}
// Or combines two EscapeStatus structs into one representing the conjunction of the two
func (status *EscapeStatus) Or(other *EscapeStatus) *EscapeStatus {
st := status
if status == nil {
st = &EscapeStatus{}
}
st.Escaped = st.Escaped || other.Escaped
st.HasError = st.HasError || other.HasError
st.HasBadRunes = st.HasBadRunes || other.HasBadRunes
st.HasAmbiguous = st.HasAmbiguous || other.HasAmbiguous
st.HasInvisible = st.HasInvisible || other.HasInvisible
return st
}

View file

@ -0,0 +1,297 @@
// Copyright 2022 The Gitea Authors. All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
package charset
import (
"fmt"
"regexp"
"sort"
"strings"
"unicode"
"unicode/utf8"
"code.gitea.io/gitea/modules/translation"
"golang.org/x/net/html"
)
// VScode defaultWordRegexp
var defaultWordRegexp = regexp.MustCompile(`(-?\d*\.\d\w*)|([^\` + "`" + `\~\!\@\#\$\%\^\&\*\(\)\-\=\+\[\{\]\}\\\|\;\:\'\"\,\.\<\>\/\?\s\x00-\x1f]+)`)
func NewEscapeStreamer(locale translation.Locale, next HTMLStreamer, allowed ...rune) HTMLStreamer {
return &escapeStreamer{
escaped: &EscapeStatus{},
PassthroughHTMLStreamer: *NewPassthroughStreamer(next),
locale: locale,
ambiguousTables: AmbiguousTablesForLocale(locale),
allowed: allowed,
}
}
type escapeStreamer struct {
PassthroughHTMLStreamer
escaped *EscapeStatus
locale translation.Locale
ambiguousTables []*AmbiguousTable
allowed []rune
}
func (e *escapeStreamer) EscapeStatus() *EscapeStatus {
return e.escaped
}
// Text tells the next streamer there is a text
func (e *escapeStreamer) Text(data string) error {
sb := &strings.Builder{}
pos, until, next := 0, 0, 0
if len(data) > len(UTF8BOM) && data[:len(UTF8BOM)] == string(UTF8BOM) {
_, _ = sb.WriteString(data[:len(UTF8BOM)])
pos = len(UTF8BOM)
}
for pos < len(data) {
nextIdxs := defaultWordRegexp.FindStringIndex(data[pos:])
if nextIdxs == nil {
until = len(data)
next = until
} else {
until, next = nextIdxs[0]+pos, nextIdxs[1]+pos
}
// from pos until until we know that the runes are not \r\t\n or even ' '
runes := make([]rune, 0, next-until)
positions := make([]int, 0, next-until+1)
for pos < until {
r, sz := utf8.DecodeRune([]byte(data)[pos:])
positions = positions[:0]
positions = append(positions, pos, pos+sz)
types, confusables, _ := e.runeTypes(r)
if err := e.handleRunes(data, []rune{r}, positions, types, confusables, sb); err != nil {
return err
}
pos += sz
}
for i := pos; i < next; {
r, sz := utf8.DecodeRune([]byte(data)[i:])
runes = append(runes, r)
positions = append(positions, i)
i += sz
}
positions = append(positions, next)
types, confusables, runeCounts := e.runeTypes(runes...)
if runeCounts.needsEscape() {
if err := e.handleRunes(data, runes, positions, types, confusables, sb); err != nil {
return err
}
} else {
_, _ = sb.Write([]byte(data)[pos:next])
}
pos = next
}
if sb.Len() > 0 {
if err := e.PassthroughHTMLStreamer.Text(sb.String()); err != nil {
return err
}
}
return nil
}
func (e *escapeStreamer) handleRunes(data string, runes []rune, positions []int, types []runeType, confusables []rune, sb *strings.Builder) error {
for i, r := range runes {
switch types[i] {
case brokenRuneType:
if sb.Len() > 0 {
if err := e.PassthroughHTMLStreamer.Text(sb.String()); err != nil {
return err
}
sb.Reset()
}
end := positions[i+1]
start := positions[i]
if err := e.brokenRune([]byte(data)[start:end]); err != nil {
return err
}
case ambiguousRuneType:
if sb.Len() > 0 {
if err := e.PassthroughHTMLStreamer.Text(sb.String()); err != nil {
return err
}
sb.Reset()
}
if err := e.ambiguousRune(r, confusables[0]); err != nil {
return err
}
confusables = confusables[1:]
case invisibleRuneType:
if sb.Len() > 0 {
if err := e.PassthroughHTMLStreamer.Text(sb.String()); err != nil {
return err
}
sb.Reset()
}
if err := e.invisibleRune(r); err != nil {
return err
}
default:
_, _ = sb.WriteRune(r)
}
}
return nil
}
func (e *escapeStreamer) brokenRune(bs []byte) error {
e.escaped.Escaped = true
e.escaped.HasBadRunes = true
if err := e.PassthroughHTMLStreamer.StartTag("span", html.Attribute{
Key: "class",
Val: "broken-code-point",
}); err != nil {
return err
}
if err := e.PassthroughHTMLStreamer.Text(fmt.Sprintf("<%X>", bs)); err != nil {
return err
}
return e.PassthroughHTMLStreamer.EndTag("span")
}
func (e *escapeStreamer) ambiguousRune(r, c rune) error {
e.escaped.Escaped = true
e.escaped.HasAmbiguous = true
if err := e.PassthroughHTMLStreamer.StartTag("span", html.Attribute{
Key: "class",
Val: "ambiguous-code-point tooltip",
}, html.Attribute{
Key: "data-content",
Val: e.locale.Tr("repo.ambiguous_character", r, c),
}); err != nil {
return err
}
if err := e.PassthroughHTMLStreamer.StartTag("span", html.Attribute{
Key: "class",
Val: "char",
}); err != nil {
return err
}
if err := e.PassthroughHTMLStreamer.Text(string(r)); err != nil {
return err
}
if err := e.PassthroughHTMLStreamer.EndTag("span"); err != nil {
return err
}
return e.PassthroughHTMLStreamer.EndTag("span")
}
func (e *escapeStreamer) invisibleRune(r rune) error {
e.escaped.Escaped = true
e.escaped.HasInvisible = true
if err := e.PassthroughHTMLStreamer.StartTag("span", html.Attribute{
Key: "class",
Val: "escaped-code-point",
}, html.Attribute{
Key: "data-escaped",
Val: fmt.Sprintf("[U+%04X]", r),
}); err != nil {
return err
}
if err := e.PassthroughHTMLStreamer.StartTag("span", html.Attribute{
Key: "class",
Val: "char",
}); err != nil {
return err
}
if err := e.PassthroughHTMLStreamer.Text(string(r)); err != nil {
return err
}
if err := e.PassthroughHTMLStreamer.EndTag("span"); err != nil {
return err
}
return e.PassthroughHTMLStreamer.EndTag("span")
}
type runeCountType struct {
numBasicRunes int
numNonConfusingNonBasicRunes int
numAmbiguousRunes int
numInvisibleRunes int
numBrokenRunes int
}
func (counts runeCountType) needsEscape() bool {
if counts.numBrokenRunes > 0 {
return true
}
if counts.numBasicRunes == 0 &&
counts.numNonConfusingNonBasicRunes > 0 {
return false
}
return counts.numAmbiguousRunes > 0 || counts.numInvisibleRunes > 0
}
type runeType int
const (
basicASCIIRuneType runeType = iota //nolint // <- This is technically deadcode but its self-documenting so it should stay
brokenRuneType
nonBasicASCIIRuneType
ambiguousRuneType
invisibleRuneType
)
func (e *escapeStreamer) runeTypes(runes ...rune) (types []runeType, confusables []rune, runeCounts runeCountType) {
types = make([]runeType, len(runes))
for i, r := range runes {
var confusable rune
switch {
case r == utf8.RuneError:
types[i] = brokenRuneType
runeCounts.numBrokenRunes++
case r == ' ' || r == '\t' || r == '\n':
runeCounts.numBasicRunes++
case e.isAllowed(r):
if r > 0x7e || r < 0x20 {
types[i] = nonBasicASCIIRuneType
runeCounts.numNonConfusingNonBasicRunes++
} else {
runeCounts.numBasicRunes++
}
case unicode.Is(InvisibleRanges, r):
types[i] = invisibleRuneType
runeCounts.numInvisibleRunes++
case unicode.IsControl(r):
types[i] = invisibleRuneType
runeCounts.numInvisibleRunes++
case isAmbiguous(r, &confusable, e.ambiguousTables...):
confusables = append(confusables, confusable)
types[i] = ambiguousRuneType
runeCounts.numAmbiguousRunes++
case r > 0x7e || r < 0x20:
types[i] = nonBasicASCIIRuneType
runeCounts.numNonConfusingNonBasicRunes++
default:
runeCounts.numBasicRunes++
}
}
return types, confusables, runeCounts
}
func (e *escapeStreamer) isAllowed(r rune) bool {
if len(e.allowed) == 0 {
return false
}
if len(e.allowed) == 1 {
return e.allowed[0] == r
}
return sort.Search(len(e.allowed), func(i int) bool {
return e.allowed[i] >= r
}) >= 0
}

View file

@ -8,6 +8,8 @@ import (
"reflect"
"strings"
"testing"
"code.gitea.io/gitea/modules/translation"
)
type escapeControlTest struct {
@ -25,37 +27,37 @@ var escapeControlTests = []escapeControlTest{
name: "single line western",
text: "single line western",
result: "single line western",
status: EscapeStatus{HasLTRScript: true},
status: EscapeStatus{},
},
{
name: "multi line western",
text: "single line western\nmulti line western\n",
result: "single line western\nmulti line western\n",
status: EscapeStatus{HasLTRScript: true},
status: EscapeStatus{},
},
{
name: "multi line western non-breaking space",
text: "single line western\nmulti line western\n",
result: `single line<span class="escaped-code-point" data-escaped="[U+00A0]"><span class="char"> </span></span>western` + "\n" + `multi line<span class="escaped-code-point" data-escaped="[U+00A0]"><span class="char"> </span></span>western` + "\n",
status: EscapeStatus{Escaped: true, HasLTRScript: true, HasSpaces: true},
status: EscapeStatus{Escaped: true, HasInvisible: true},
},
{
name: "mixed scripts: western + japanese",
text: "日属秘ぞしちゅ。Then some western.",
result: "日属秘ぞしちゅ。Then some western.",
status: EscapeStatus{HasLTRScript: true},
status: EscapeStatus{},
},
{
name: "japanese",
text: "日属秘ぞしちゅ。",
result: "日属秘ぞしちゅ。",
status: EscapeStatus{HasLTRScript: true},
status: EscapeStatus{},
},
{
name: "hebrew",
text: "עד תקופת יוון העתיקה היה העיסוק במתמטיקה תכליתי בלבד: היא שימשה כאוסף של נוסחאות לחישוב קרקע, אוכלוסין וכו'. פריצת הדרך של היוונים, פרט לתרומותיהם הגדולות לידע המתמטי, הייתה בלימוד המתמטיקה כשלעצמה, מתוקף ערכה הרוחני. יחסם של חלק מהיוונים הקדמונים למתמטיקה היה דתי - למשל, הכת שאסף סביבו פיתגורס האמינה כי המתמטיקה היא הבסיס לכל הדברים. היוונים נחשבים ליוצרי מושג ההוכחה המתמטית, וכן לראשונים שעסקו במתמטיקה לשם עצמה, כלומר כתחום מחקרי עיוני ומופשט ולא רק כעזר שימושי. עם זאת, לצדה",
result: "עד תקופת יוון העתיקה היה העיסוק במתמטיקה תכליתי בלבד: היא שימשה כאוסף של נוסחאות לחישוב קרקע, אוכלוסין וכו'. פריצת הדרך של היוונים, פרט לתרומותיהם הגדולות לידע המתמטי, הייתה בלימוד המתמטיקה כשלעצמה, מתוקף ערכה הרוחני. יחסם של חלק מהיוונים הקדמונים למתמטיקה היה דתי - למשל, הכת שאסף סביבו פיתגורס האמינה כי המתמטיקה היא הבסיס לכל הדברים. היוונים נחשבים ליוצרי מושג ההוכחה המתמטית, וכן לראשונים שעסקו במתמטיקה לשם עצמה, כלומר כתחום מחקרי עיוני ומופשט ולא רק כעזר שימושי. עם זאת, לצדה",
status: EscapeStatus{HasRTLScript: true},
result: `עד תקופת <span class="ambiguous-code-point tooltip" data-content="repo.ambiguous_character"><span class="char">י</span></span><span class="ambiguous-code-point tooltip" data-content="repo.ambiguous_character"><span class="char">ו</span></span><span class="ambiguous-code-point tooltip" data-content="repo.ambiguous_character"><span class="char">ו</span></span><span class="ambiguous-code-point tooltip" data-content="repo.ambiguous_character"><span class="char">ן</span></span> העתיקה היה העיסוק במתמטיקה תכליתי בלבד: היא שימשה כאוסף של נוסחאות לחישוב קרקע, אוכלוסין וכו&#39;. פריצת הדרך של היוונים, פרט לתרומותיהם הגדולות לידע המתמטי, הייתה בלימוד המתמטיקה כשלעצמה, מתוקף ערכה הרוחני. יחסם של חלק מהיוונים הקדמונים למתמטיקה היה דתי - למשל, הכת שאסף סביבו פיתגורס האמינה כי המתמטיקה היא הבסיס לכל הדברים. היוונים נחשבים ליוצרי מושג ההוכחה המתמטית, וכן לראשונים שעסקו במתמטיקה לשם עצמה, כלומר כתחום מחקרי עיוני ומופשט ולא רק כעזר שימושי. עם זאת, לצדה`,
status: EscapeStatus{Escaped: true, HasAmbiguous: true},
},
{
name: "more hebrew",
@ -64,12 +66,12 @@ var escapeControlTests = []escapeControlTest{
המתמטיקאי הבולט הראשון ביוון העתיקה, ויש האומרים בתולדות האנושות, הוא תאלס (624 לפנה"ס - 546 לפנה"ס בקירוב).[1] לא יהיה זה משולל יסוד להניח שהוא האדם הראשון שהוכיח משפט מתמטי, ולא רק גילה אותו. תאלס הוכיח שישרים מקבילים חותכים מצד אחד של שוקי זווית קטעים בעלי יחסים שווים (משפט תאלס הראשון), שהזווית המונחת על קוטר במעגל היא זווית ישרה (משפט תאלס השני), שהקוטר מחלק את המעגל לשני חלקים שווים, ושזוויות הבסיס במשולש שווה-שוקיים שוות זו לזו. מיוחסות לו גם שיטות למדידת גובהן של הפירמידות בעזרת מדידת צילן ולקביעת מיקומה של ספינה הנראית מן החוף.
בשנים 582 לפנה"ס עד 496 לפנה"ס, בקירוב, חי מתמטיקאי חשוב במיוחד - פיתגורס. המקורות הראשוניים עליו מועטים, וההיסטוריונים מתקשים להפריד את העובדות משכבת המסתורין והאגדות שנקשרו בו. ידוע שסביבו התקבצה האסכולה הפיתגוראית מעין כת פסבדו-מתמטית שהאמינה ש"הכל מספר", או ליתר דיוק הכל ניתן לכימות, וייחסה למספרים משמעויות מיסטיות. ככל הנראה הפיתגוראים ידעו לבנות את הגופים האפלטוניים, הכירו את הממוצע האריתמטי, הממוצע הגאומטרי והממוצע ההרמוני והגיעו להישגים חשובים נוספים. ניתן לומר שהפיתגוראים גילו את היותו של השורש הריבועי של 2, שהוא גם האלכסון בריבוע שאורך צלעותיו 1, אי רציונלי, אך תגליתם הייתה למעשה רק שהקטעים "חסרי מידה משותפת", ומושג המספר האי רציונלי מאוחר יותר.[2] אזכור ראשון לקיומם של קטעים חסרי מידה משותפת מופיע בדיאלוג "תאיטיטוס" של אפלטון, אך רעיון זה היה מוכר עוד קודם לכן, במאה החמישית לפנה"ס להיפאסוס, בן האסכולה הפיתגוראית, ואולי לפיתגורס עצמו.[3]`,
result: `בתקופה מאוחרת יותר, השתמשו היוונים בשיטת סימון מתקדמת יותר, שבה הוצגו המספרים לפי 22 אותיות האלפבית היווני. לסימון המספרים בין 1 ל-9 נקבעו תשע האותיות הראשונות, בתוספת גרש ( ' ) בצד ימין של האות, למעלה; תשע האותיות הבאות ייצגו את העשרות מ-10 עד 90, והבאות את המאות. לסימון הספרות בין 1000 ל-900,000, השתמשו היוונים באותן אותיות, אך הוסיפו לאותיות את הגרש דווקא מצד שמאל של האותיות, למטה. ממיליון ומעלה, כנראה השתמשו היוונים בשני תגים במקום אחד.
result: `בתקופה מאוחרת יותר, השתמשו היוונים בשיטת סימון מתקדמת יותר, שבה הוצגו המספרים לפי 22 אותיות האלפבית היווני. לסימון המספרים בין 1 ל-9 נקבעו תשע האותיות הראשונות, בתוספת גרש ( &#39; ) בצד ימין של האות, למעלה; תשע האותיות הבאות ייצגו את העשרות מ-10 עד 90, והבאות את המאות. לסימון הספרות בין 1000 ל-900,000, השתמשו היוונים באותן אותיות, אך הוסיפו לאותיות את הגרש דווקא מצד שמאל של האותיות, למטה. ממיליון ומעלה, כנראה השתמשו היוונים בשני תגים במקום אחד.
המתמטיקאי הבולט הראשון ביוון העתיקה, ויש האומרים בתולדות האנושות, הוא תאלס (624 לפנה"ס - 546 לפנה"ס בקירוב).[1] לא יהיה זה משולל יסוד להניח שהוא האדם הראשון שהוכיח משפט מתמטי, ולא רק גילה אותו. תאלס הוכיח שישרים מקבילים חותכים מצד אחד של שוקי זווית קטעים בעלי יחסים שווים (משפט תאלס הראשון), שהזווית המונחת על קוטר במעגל היא זווית ישרה (משפט תאלס השני), שהקוטר מחלק את המעגל לשני חלקים שווים, ושזוויות הבסיס במשולש שווה-שוקיים שוות זו לזו. מיוחסות לו גם שיטות למדידת גובהן של הפירמידות בעזרת מדידת צילן ולקביעת מיקומה של ספינה הנראית מן החוף.
המתמטיקאי הבולט הראשון ביוון העתיקה, ויש האומרים בתולדות האנושות, הוא תאלס (624 לפנה&#34;<span class="ambiguous-code-point tooltip" data-content="repo.ambiguous_character"><span class="char">ס</span></span> - 546 לפנה&#34;<span class="ambiguous-code-point tooltip" data-content="repo.ambiguous_character"><span class="char">ס</span></span> בקירוב).[1] לא יהיה זה משולל יסוד להניח שהוא האדם הראשון שהוכיח משפט מתמטי, ולא רק גילה אותו. תאלס הוכיח שישרים מקבילים חותכים מצד אחד של שוקי זווית קטעים בעלי יחסים שווים (משפט תאלס הראשון), שהזווית המונחת על קוטר במעגל היא זווית ישרה (משפט תאלס השני), שהקוטר מחלק את המעגל לשני חלקים שווים, ושזוויות הבסיס במשולש שווה-שוקיים שוות זו לזו. מיוחסות לו גם שיטות למדידת גובהן של הפירמידות בעזרת מדידת צילן ולקביעת מיקומה של ספינה הנראית מן החוף.
בשנים 582 לפנה"ס עד 496 לפנה"ס, בקירוב, חי מתמטיקאי חשוב במיוחד - פיתגורס. המקורות הראשוניים עליו מועטים, וההיסטוריונים מתקשים להפריד את העובדות משכבת המסתורין והאגדות שנקשרו בו. ידוע שסביבו התקבצה האסכולה הפיתגוראית מעין כת פסבדו-מתמטית שהאמינה ש"הכל מספר", או ליתר דיוק הכל ניתן לכימות, וייחסה למספרים משמעויות מיסטיות. ככל הנראה הפיתגוראים ידעו לבנות את הגופים האפלטוניים, הכירו את הממוצע האריתמטי, הממוצע הגאומטרי והממוצע ההרמוני והגיעו להישגים חשובים נוספים. ניתן לומר שהפיתגוראים גילו את היותו של השורש הריבועי של 2, שהוא גם האלכסון בריבוע שאורך צלעותיו 1, אי רציונלי, אך תגליתם הייתה למעשה רק שהקטעים "חסרי מידה משותפת", ומושג המספר האי רציונלי מאוחר יותר.[2] אזכור ראשון לקיומם של קטעים חסרי מידה משותפת מופיע בדיאלוג "תאיטיטוס" של אפלטון, אך רעיון זה היה מוכר עוד קודם לכן, במאה החמישית לפנה"ס להיפאסוס, בן האסכולה הפיתגוראית, ואולי לפיתגורס עצמו.[3]`,
status: EscapeStatus{HasRTLScript: true},
בשנים 582 לפנה&#34;<span class="ambiguous-code-point tooltip" data-content="repo.ambiguous_character"><span class="char">ס</span></span> עד 496 לפנה&#34;<span class="ambiguous-code-point tooltip" data-content="repo.ambiguous_character"><span class="char">ס</span></span>, בקירוב, חי מתמטיקאי חשוב במיוחד - פיתגורס. המקורות הראשוניים עליו מועטים, וההיסטוריונים מתקשים להפריד את העובדות משכבת המסתורין והאגדות שנקשרו בו. ידוע שסביבו התקבצה האסכולה הפיתגוראית מעין כת פסבדו-מתמטית שהאמינה ש&#34;הכל מספר&#34;, או ליתר דיוק הכל ניתן לכימות, וייחסה למספרים משמעויות מיסטיות. ככל הנראה הפיתגוראים ידעו לבנות את הגופים האפלטוניים, הכירו את הממוצע האריתמטי, הממוצע הגאומטרי והממוצע ההרמוני והגיעו להישגים חשובים נוספים. ניתן לומר שהפיתגוראים גילו את היותו של השורש הריבועי של 2, שהוא גם האלכסון בריבוע שאורך צלעותיו 1, אי רציונלי, אך תגליתם הייתה למעשה רק שהקטעים &#34;חסרי מידה משותפת&#34;, ומושג המספר האי רציונלי מאוחר יותר.[2] אזכור ראשון לקיומם של קטעים חסרי מידה משותפת מופיע בדיאלוג &#34;תאיטיטוס&#34; של אפלטון, אך רעיון זה היה מוכר עוד קודם לכן, במאה החמישית לפנה&#34;<span class="ambiguous-code-point tooltip" data-content="repo.ambiguous_character"><span class="char">ס</span></span> להיפאסוס, בן האסכולה הפיתגוראית, ואולי לפיתגורס עצמו.[3]`,
status: EscapeStatus{Escaped: true, HasAmbiguous: true},
},
{
name: "Mixed RTL+LTR",
@ -79,10 +81,7 @@ then resh (ר), and finally heh (ה) (which should appear leftmost).`,
result: `Many computer programs fail to display bidirectional text correctly.
For example, the Hebrew name Sarah (שרה) is spelled: sin (ש) (which appears rightmost),
then resh (ר), and finally heh (ה) (which should appear leftmost).`,
status: EscapeStatus{
HasRTLScript: true,
HasLTRScript: true,
},
status: EscapeStatus{},
},
{
name: "Mixed RTL+LTR+BIDI",
@ -90,32 +89,27 @@ then resh (ר), and finally heh (ה) (which should appear leftmost).`,
For example, the Hebrew name Sarah ` + "\u2067" + `שרה` + "\u2066\n" +
`sin (ש) (which appears rightmost), then resh (ר), and finally heh (ה) (which should appear leftmost).`,
result: `Many computer programs fail to display bidirectional text correctly.
For example, the Hebrew name Sarah <span class="escaped-code-point" data-escaped="[U+2067]"><span class="char">` + "\u2067" + `</span></span>שרה<span class="escaped-code-point" data-escaped="[U+2066]"><span class="char">` + "\u2066" + `</span></span>` + "\n" +
For example, the Hebrew name Sarah ` + "\u2067" + `שרה` + "\u2066\n" +
`sin (ש) (which appears rightmost), then resh (ר), and finally heh (ה) (which should appear leftmost).`,
status: EscapeStatus{
Escaped: true,
HasBIDI: true,
HasRTLScript: true,
HasLTRScript: true,
},
status: EscapeStatus{},
},
{
name: "Accented characters",
text: string([]byte{0xc3, 0xa1, 0xc3, 0xa9, 0xc3, 0xad, 0xc3, 0xb3, 0xc3, 0xba}),
result: string([]byte{0xc3, 0xa1, 0xc3, 0xa9, 0xc3, 0xad, 0xc3, 0xb3, 0xc3, 0xba}),
status: EscapeStatus{HasLTRScript: true},
status: EscapeStatus{},
},
{
name: "Program",
text: "string([]byte{0xc3, 0xa1, 0xc3, 0xa9, 0xc3, 0xad, 0xc3, 0xb3, 0xc3, 0xba})",
result: "string([]byte{0xc3, 0xa1, 0xc3, 0xa9, 0xc3, 0xad, 0xc3, 0xb3, 0xc3, 0xba})",
status: EscapeStatus{HasLTRScript: true},
status: EscapeStatus{},
},
{
name: "CVE testcase",
text: "if access_level != \"user\u202E \u2066// Check if admin\u2069 \u2066\" {",
result: `if access_level != "user<span class="escaped-code-point" data-escaped="[U+202E]"><span class="char">` + "\u202e" + `</span></span> <span class="escaped-code-point" data-escaped="[U+2066]"><span class="char">` + "\u2066" + `</span></span>// Check if admin<span class="escaped-code-point" data-escaped="[U+2069]"><span class="char">` + "\u2069" + `</span></span> <span class="escaped-code-point" data-escaped="[U+2066]"><span class="char">` + "\u2066" + `</span></span>" {`,
status: EscapeStatus{Escaped: true, HasBIDI: true, BadBIDI: true, HasLTRScript: true},
result: `if access_level != &#34;user<span class="escaped-code-point" data-escaped="[U+202E]"><span class="char">` + "\u202e" + `</span></span> <span class="escaped-code-point" data-escaped="[U+2066]"><span class="char">` + "\u2066" + `</span></span>// Check if admin<span class="escaped-code-point" data-escaped="[U+2069]"><span class="char">` + "\u2069" + `</span></span> <span class="escaped-code-point" data-escaped="[U+2066]"><span class="char">` + "\u2066" + `</span></span>&#34; {`,
status: EscapeStatus{Escaped: true, HasInvisible: true},
},
{
name: "Mixed testcase with fail",
@ -124,10 +118,10 @@ then resh (ר), and finally heh (ה) (which should appear leftmost).`,
`sin (ש) (which appears rightmost), then resh (ר), and finally heh (ה) (which should appear leftmost).` +
"\nif access_level != \"user\u202E \u2066// Check if admin\u2069 \u2066\" {\n",
result: `Many computer programs fail to display bidirectional text correctly.
For example, the Hebrew name Sarah <span class="escaped-code-point" data-escaped="[U+2067]"><span class="char">` + "\u2067" + `</span></span>שרה<span class="escaped-code-point" data-escaped="[U+2066]"><span class="char">` + "\u2066" + `</span></span>` + "\n" +
For example, the Hebrew name Sarah ` + "\u2067" + `שרה` + "\u2066\n" +
`sin (ש) (which appears rightmost), then resh (ר), and finally heh (ה) (which should appear leftmost).` +
"\n" + `if access_level != "user<span class="escaped-code-point" data-escaped="[U+202E]"><span class="char">` + "\u202e" + `</span></span> <span class="escaped-code-point" data-escaped="[U+2066]"><span class="char">` + "\u2066" + `</span></span>// Check if admin<span class="escaped-code-point" data-escaped="[U+2069]"><span class="char">` + "\u2069" + `</span></span> <span class="escaped-code-point" data-escaped="[U+2066]"><span class="char">` + "\u2066" + `</span></span>" {` + "\n",
status: EscapeStatus{Escaped: true, HasBIDI: true, BadBIDI: true, HasLTRScript: true, HasRTLScript: true},
"\n" + `if access_level != &#34;user<span class="escaped-code-point" data-escaped="[U+202E]"><span class="char">` + "\u202e" + `</span></span> <span class="escaped-code-point" data-escaped="[U+2066]"><span class="char">` + "\u2066" + `</span></span>// Check if admin<span class="escaped-code-point" data-escaped="[U+2069]"><span class="char">` + "\u2069" + `</span></span> <span class="escaped-code-point" data-escaped="[U+2066]"><span class="char">` + "\u2066" + `</span></span>&#34; {` + "\n",
status: EscapeStatus{Escaped: true, HasInvisible: true},
},
{
// UTF-8/16/32 all use the same codepoint for BOM
@ -135,15 +129,16 @@ then resh (ר), and finally heh (ה) (which should appear leftmost).`,
name: "UTF BOM",
text: "\xef\xbb\xbftest",
result: "\xef\xbb\xbftest",
status: EscapeStatus{HasLTRScript: true},
status: EscapeStatus{},
},
}
func TestEscapeControlString(t *testing.T) {
for _, tt := range escapeControlTests {
t.Run(tt.name, func(t *testing.T) {
status, result := EscapeControlString(tt.text)
if !reflect.DeepEqual(status, tt.status) {
locale := translation.NewLocale("en_US")
status, result := EscapeControlString(tt.text, locale)
if !reflect.DeepEqual(*status, tt.status) {
t.Errorf("EscapeControlString() status = %v, wanted= %v", status, tt.status)
}
if result != tt.result {
@ -153,20 +148,6 @@ func TestEscapeControlString(t *testing.T) {
}
}
func TestEscapeControlBytes(t *testing.T) {
for _, tt := range escapeControlTests {
t.Run(tt.name, func(t *testing.T) {
status, result := EscapeControlBytes([]byte(tt.text))
if !reflect.DeepEqual(status, tt.status) {
t.Errorf("EscapeControlBytes() status = %v, wanted= %v", status, tt.status)
}
if string(result) != tt.result {
t.Errorf("EscapeControlBytes()\nresult= %v,\nwanted= %v", result, tt.result)
}
})
}
}
func TestEscapeControlReader(t *testing.T) {
// lets add some control characters to the tests
tests := make([]escapeControlTest, 0, len(escapeControlTests)*3)
@ -184,16 +165,7 @@ func TestEscapeControlReader(t *testing.T) {
test.text = addPrefix("\u001E", test.text)
test.result = addPrefix(`<span class="escaped-code-point" data-escaped="[U+001E]"><span class="char">`+"\u001e"+`</span></span>`, test.result)
test.status.Escaped = true
test.status.HasControls = true
tests = append(tests, test)
}
for _, test := range escapeControlTests {
test.name += " (+Mark)"
test.text = addPrefix("\u0300", test.text)
test.result = addPrefix(`<span class="escaped-code-point" data-escaped="[U+0300]"><span class="char">`+"\u0300"+`</span></span>`, test.result)
test.status.Escaped = true
test.status.HasMarks = true
test.status.HasInvisible = true
tests = append(tests, test)
}
@ -201,13 +173,13 @@ func TestEscapeControlReader(t *testing.T) {
t.Run(tt.name, func(t *testing.T) {
input := strings.NewReader(tt.text)
output := &strings.Builder{}
status, err := EscapeControlReader(input, output)
status, err := EscapeControlReader(input, output, translation.NewLocale("en_US"))
result := output.String()
if err != nil {
t.Errorf("EscapeControlReader(): err = %v", err)
}
if !reflect.DeepEqual(status, tt.status) {
if !reflect.DeepEqual(*status, tt.status) {
t.Errorf("EscapeControlReader() status = %v, wanted= %v", status, tt.status)
}
if result != tt.result {
@ -223,5 +195,5 @@ func TestEscapeControlReader_panic(t *testing.T) {
for i := 0; i < 6826; i++ {
bs = append(bs, []byte("—")...)
}
_, _ = EscapeControlBytes(bs)
_, _ = EscapeControlString(string(bs), translation.NewLocale("en_US"))
}

View file

@ -0,0 +1,201 @@
// Copyright 2022 The Gitea Authors. All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
package charset
import (
"fmt"
"io"
"golang.org/x/net/html"
)
// HTMLStreamer represents a SAX-like interface for HTML
type HTMLStreamer interface {
Error(err error) error
Doctype(data string) error
Comment(data string) error
StartTag(data string, attrs ...html.Attribute) error
SelfClosingTag(data string, attrs ...html.Attribute) error
EndTag(data string) error
Text(data string) error
}
// PassthroughHTMLStreamer is a passthrough streamer
type PassthroughHTMLStreamer struct {
next HTMLStreamer
}
func NewPassthroughStreamer(next HTMLStreamer) *PassthroughHTMLStreamer {
return &PassthroughHTMLStreamer{next: next}
}
var _ (HTMLStreamer) = &PassthroughHTMLStreamer{}
// Error tells the next streamer in line that there is an error
func (p *PassthroughHTMLStreamer) Error(err error) error {
return p.next.Error(err)
}
// Doctype tells the next streamer what the doctype is
func (p *PassthroughHTMLStreamer) Doctype(data string) error {
return p.next.Doctype(data)
}
// Comment tells the next streamer there is a comment
func (p *PassthroughHTMLStreamer) Comment(data string) error {
return p.next.Comment(data)
}
// StartTag tells the next streamer there is a starting tag
func (p *PassthroughHTMLStreamer) StartTag(data string, attrs ...html.Attribute) error {
return p.next.StartTag(data, attrs...)
}
// SelfClosingTag tells the next streamer there is a self-closing tag
func (p *PassthroughHTMLStreamer) SelfClosingTag(data string, attrs ...html.Attribute) error {
return p.next.SelfClosingTag(data, attrs...)
}
// EndTag tells the next streamer there is a end tag
func (p *PassthroughHTMLStreamer) EndTag(data string) error {
return p.next.EndTag(data)
}
// Text tells the next streamer there is a text
func (p *PassthroughHTMLStreamer) Text(data string) error {
return p.next.Text(data)
}
// HTMLStreamWriter acts as a writing sink
type HTMLStreamerWriter struct {
io.Writer
err error
}
// Write implements io.Writer
func (h *HTMLStreamerWriter) Write(data []byte) (int, error) {
if h.err != nil {
return 0, h.err
}
return h.Writer.Write(data)
}
// Write implements io.StringWriter
func (h *HTMLStreamerWriter) WriteString(data string) (int, error) {
if h.err != nil {
return 0, h.err
}
return h.Writer.Write([]byte(data))
}
// Error tells the next streamer in line that there is an error
func (h *HTMLStreamerWriter) Error(err error) error {
if h.err == nil {
h.err = err
}
return h.err
}
// Doctype tells the next streamer what the doctype is
func (h *HTMLStreamerWriter) Doctype(data string) error {
_, h.err = h.WriteString("<!DOCTYPE " + data + ">")
return h.err
}
// Comment tells the next streamer there is a comment
func (h *HTMLStreamerWriter) Comment(data string) error {
_, h.err = h.WriteString("<!--" + data + "-->")
return h.err
}
// StartTag tells the next streamer there is a starting tag
func (h *HTMLStreamerWriter) StartTag(data string, attrs ...html.Attribute) error {
return h.startTag(data, attrs, false)
}
// SelfClosingTag tells the next streamer there is a self-closing tag
func (h *HTMLStreamerWriter) SelfClosingTag(data string, attrs ...html.Attribute) error {
return h.startTag(data, attrs, true)
}
func (h *HTMLStreamerWriter) startTag(data string, attrs []html.Attribute, selfclosing bool) error {
if _, h.err = h.WriteString("<" + data); h.err != nil {
return h.err
}
for _, attr := range attrs {
if _, h.err = h.WriteString(" " + attr.Key + "=\"" + html.EscapeString(attr.Val) + "\""); h.err != nil {
return h.err
}
}
if selfclosing {
if _, h.err = h.WriteString("/>"); h.err != nil {
return h.err
}
} else {
if _, h.err = h.WriteString(">"); h.err != nil {
return h.err
}
}
return h.err
}
// EndTag tells the next streamer there is a end tag
func (h *HTMLStreamerWriter) EndTag(data string) error {
_, h.err = h.WriteString("</" + data + ">")
return h.err
}
// Text tells the next streamer there is a text
func (h *HTMLStreamerWriter) Text(data string) error {
_, h.err = h.WriteString(html.EscapeString(data))
return h.err
}
// StreamHTML streams an html to a provided streamer
func StreamHTML(source io.Reader, streamer HTMLStreamer) error {
tokenizer := html.NewTokenizer(source)
for {
tt := tokenizer.Next()
switch tt {
case html.ErrorToken:
if tokenizer.Err() != io.EOF {
return tokenizer.Err()
}
return nil
case html.DoctypeToken:
token := tokenizer.Token()
if err := streamer.Doctype(token.Data); err != nil {
return err
}
case html.CommentToken:
token := tokenizer.Token()
if err := streamer.Comment(token.Data); err != nil {
return err
}
case html.StartTagToken:
token := tokenizer.Token()
if err := streamer.StartTag(token.Data, token.Attr...); err != nil {
return err
}
case html.SelfClosingTagToken:
token := tokenizer.Token()
if err := streamer.StartTag(token.Data, token.Attr...); err != nil {
return err
}
case html.EndTagToken:
token := tokenizer.Token()
if err := streamer.EndTag(token.Data); err != nil {
return err
}
case html.TextToken:
token := tokenizer.Token()
if err := streamer.Text(token.Data); err != nil {
return err
}
default:
return fmt.Errorf("unknown type of token: %d", tt)
}
}
}

View file

@ -0,0 +1,111 @@
// Copyright 2022 The Gitea Authors. All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
package main
import (
"bytes"
"flag"
"fmt"
"go/format"
"os"
"text/template"
"golang.org/x/text/unicode/rangetable"
)
// InvisibleRunes these are runes that vscode has assigned to be invisible
// See https://github.com/hediet/vscode-unicode-data
var InvisibleRunes = []rune{
9, 10, 11, 12, 13, 32, 127, 160, 173, 847, 1564, 4447, 4448, 6068, 6069, 6155, 6156, 6157, 6158, 7355, 7356, 8192, 8193, 8194, 8195, 8196, 8197, 8198, 8199, 8200, 8201, 8202, 8203, 8204, 8205, 8206, 8207, 8234, 8235, 8236, 8237, 8238, 8239, 8287, 8288, 8289, 8290, 8291, 8292, 8293, 8294, 8295, 8296, 8297, 8298, 8299, 8300, 8301, 8302, 8303, 10240, 12288, 12644, 65024, 65025, 65026, 65027, 65028, 65029, 65030, 65031, 65032, 65033, 65034, 65035, 65036, 65037, 65038, 65039, 65279, 65440, 65520, 65521, 65522, 65523, 65524, 65525, 65526, 65527, 65528, 65532, 78844, 119155, 119156, 119157, 119158, 119159, 119160, 119161, 119162, 917504, 917505, 917506, 917507, 917508, 917509, 917510, 917511, 917512, 917513, 917514, 917515, 917516, 917517, 917518, 917519, 917520, 917521, 917522, 917523, 917524, 917525, 917526, 917527, 917528, 917529, 917530, 917531, 917532, 917533, 917534, 917535, 917536, 917537, 917538, 917539, 917540, 917541, 917542, 917543, 917544, 917545, 917546, 917547, 917548, 917549, 917550, 917551, 917552, 917553, 917554, 917555, 917556, 917557, 917558, 917559, 917560, 917561, 917562, 917563, 917564, 917565, 917566, 917567, 917568, 917569, 917570, 917571, 917572, 917573, 917574, 917575, 917576, 917577, 917578, 917579, 917580, 917581, 917582, 917583, 917584, 917585, 917586, 917587, 917588, 917589, 917590, 917591, 917592, 917593, 917594, 917595, 917596, 917597, 917598, 917599, 917600, 917601, 917602, 917603, 917604, 917605, 917606, 917607, 917608, 917609, 917610, 917611, 917612, 917613, 917614, 917615, 917616, 917617, 917618, 917619, 917620, 917621, 917622, 917623, 917624, 917625, 917626, 917627, 917628, 917629, 917630, 917631, 917760, 917761, 917762, 917763, 917764, 917765, 917766, 917767, 917768, 917769, 917770, 917771, 917772, 917773, 917774, 917775, 917776, 917777, 917778, 917779, 917780, 917781, 917782, 917783, 917784, 917785, 917786, 917787, 917788, 917789, 917790, 917791, 917792, 917793, 917794, 917795, 917796, 917797, 917798, 917799, 917800, 917801, 917802, 917803, 917804, 917805, 917806, 917807, 917808, 917809, 917810, 917811, 917812, 917813, 917814, 917815, 917816, 917817, 917818, 917819, 917820, 917821, 917822, 917823, 917824, 917825, 917826, 917827, 917828, 917829, 917830, 917831, 917832, 917833, 917834, 917835, 917836, 917837, 917838, 917839, 917840, 917841, 917842, 917843, 917844, 917845, 917846, 917847, 917848, 917849, 917850, 917851, 917852, 917853, 917854, 917855, 917856, 917857, 917858, 917859, 917860, 917861, 917862, 917863, 917864, 917865, 917866, 917867, 917868, 917869, 917870, 917871, 917872, 917873, 917874, 917875, 917876, 917877, 917878, 917879, 917880, 917881, 917882, 917883, 917884, 917885, 917886, 917887, 917888, 917889, 917890, 917891, 917892, 917893, 917894, 917895, 917896, 917897, 917898, 917899, 917900, 917901, 917902, 917903, 917904, 917905, 917906, 917907, 917908, 917909, 917910, 917911, 917912, 917913, 917914, 917915, 917916, 917917, 917918, 917919, 917920, 917921, 917922, 917923, 917924, 917925, 917926, 917927, 917928, 917929, 917930, 917931, 917932, 917933, 917934, 917935, 917936, 917937, 917938, 917939, 917940, 917941, 917942, 917943, 917944, 917945, 917946, 917947, 917948, 917949, 917950, 917951, 917952, 917953, 917954, 917955, 917956, 917957, 917958, 917959, 917960, 917961, 917962, 917963, 917964, 917965, 917966, 917967, 917968, 917969, 917970, 917971, 917972, 917973, 917974, 917975, 917976, 917977, 917978, 917979, 917980, 917981, 917982, 917983, 917984, 917985, 917986, 917987, 917988, 917989, 917990, 917991, 917992, 917993, 917994, 917995, 917996, 917997, 917998, 917999,
}
var verbose bool
func main() {
flag.Usage = func() {
fmt.Fprintf(os.Stderr, `%s: Generate InvisibleRunesRange
Usage: %[1]s [-v] [-o output.go]
`, os.Args[0])
flag.PrintDefaults()
}
output := ""
flag.BoolVar(&verbose, "v", false, "verbose output")
flag.StringVar(&output, "o", "invisible_gen.go", "file to output to")
flag.Parse()
// First we filter the runes to remove
// <space><tab><newline>
filtered := make([]rune, 0, len(InvisibleRunes))
for _, r := range InvisibleRunes {
if r == ' ' || r == '\t' || r == '\n' {
continue
}
filtered = append(filtered, r)
}
table := rangetable.New(filtered...)
if err := runTemplate(generatorTemplate, output, table); err != nil {
fatalf("Unable to run template: %v", err)
}
}
func runTemplate(t *template.Template, filename string, data interface{}) error {
buf := bytes.NewBuffer(nil)
if err := t.Execute(buf, data); err != nil {
return fmt.Errorf("unable to execute template: %w", err)
}
bs, err := format.Source(buf.Bytes())
if err != nil {
verbosef("Bad source:\n%s", buf.String())
return fmt.Errorf("unable to format source: %w", err)
}
file, err := os.Create(filename)
if err != nil {
return fmt.Errorf("failed to create file %s because %w", filename, err)
}
defer file.Close()
_, err = file.Write(bs)
if err != nil {
return fmt.Errorf("unable to write generated source: %w", err)
}
return nil
}
var generatorTemplate = template.Must(template.New("invisibleTemplate").Parse(`// This file is generated by modules/charset/invisible/generate.go DO NOT EDIT
// Copyright 2022 The Gitea Authors. All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
package charset
import "unicode"
var InvisibleRanges = &unicode.RangeTable{
R16: []unicode.Range16{
{{range .R16 }} {Lo:{{.Lo}}, Hi:{{.Hi}}, Stride: {{.Stride}}},
{{end}} },
R32: []unicode.Range32{
{{range .R32}} {Lo:{{.Lo}}, Hi:{{.Hi}}, Stride: {{.Stride}}},
{{end}} },
LatinOffset: {{.LatinOffset}},
}
`))
func logf(format string, args ...interface{}) {
fmt.Fprintf(os.Stderr, format+"\n", args...)
}
func verbosef(format string, args ...interface{}) {
if verbose {
logf(format, args...)
}
}
func fatalf(format string, args ...interface{}) {
logf("fatal: "+format+"\n", args...)
os.Exit(1)
}

View file

@ -0,0 +1,37 @@
// This file is generated by modules/charset/invisible/generate.go DO NOT EDIT
// Copyright 2022 The Gitea Authors. All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
package charset
import "unicode"
var InvisibleRanges = &unicode.RangeTable{
R16: []unicode.Range16{
{Lo: 11, Hi: 13, Stride: 1},
{Lo: 127, Hi: 160, Stride: 33},
{Lo: 173, Hi: 847, Stride: 674},
{Lo: 1564, Hi: 4447, Stride: 2883},
{Lo: 4448, Hi: 6068, Stride: 1620},
{Lo: 6069, Hi: 6155, Stride: 86},
{Lo: 6156, Hi: 6158, Stride: 1},
{Lo: 7355, Hi: 7356, Stride: 1},
{Lo: 8192, Hi: 8207, Stride: 1},
{Lo: 8234, Hi: 8239, Stride: 1},
{Lo: 8287, Hi: 8303, Stride: 1},
{Lo: 10240, Hi: 12288, Stride: 2048},
{Lo: 12644, Hi: 65024, Stride: 52380},
{Lo: 65025, Hi: 65039, Stride: 1},
{Lo: 65279, Hi: 65440, Stride: 161},
{Lo: 65520, Hi: 65528, Stride: 1},
{Lo: 65532, Hi: 65532, Stride: 1},
},
R32: []unicode.Range32{
{Lo: 78844, Hi: 119155, Stride: 40311},
{Lo: 119156, Hi: 119162, Stride: 1},
{Lo: 917504, Hi: 917631, Stride: 1},
{Lo: 917760, Hi: 917999, Stride: 1},
},
LatinOffset: 2,
}

View file

@ -1035,13 +1035,13 @@ file_view_rendered = View Rendered
file_view_raw = View Raw
file_permalink = Permalink
file_too_large = The file is too large to be shown.
bidi_bad_header = `This file contains unexpected Bidirectional Unicode characters!`
bidi_bad_description = `This file contains unexpected Bidirectional Unicode characters that may be processed differently from what appears below. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to reveal hidden characters.`
bidi_bad_description_escaped = `This file contains unexpected Bidirectional Unicode characters. Hidden unicode characters are escaped below. Use the Unescape button to show how they render.`
unicode_header = `This file contains hidden Unicode characters!`
unicode_description = `This file contains hidden Unicode characters that may be processed differently from what appears below. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to reveal hidden characters.`
unicode_description_escaped = `This file contains hidden Unicode characters. Hidden unicode characters are escaped below. Use the Unescape button to show how they render.`
line_unicode = `This line has hidden unicode characters`
invisible_runes_header = `This file contains invisible Unicode characters!`
invisible_runes_description = `This file contains invisible Unicode characters that may be processed differently from what appears below. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to reveal hidden characters.`
ambiguous_runes_header = `This file contains ambiguous Unicode characters!`
ambiguous_runes_description = `This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.`
invisible_runes_line = `This line has invisible unicode characters`
ambiguous_runes_line = `This line has ambiguous unicode characters`
ambiguous_character = `%[1]c [U+%04[1]X] is confusable with %[2]c [U+%04[2]X]`
escape_control_characters = Escape
unescape_control_characters = Unescape

View file

@ -40,7 +40,7 @@ type blameRow struct {
CommitMessage string
CommitSince gotemplate.HTML
Code gotemplate.HTML
EscapeStatus charset.EscapeStatus
EscapeStatus *charset.EscapeStatus
}
// RefBlame render blame page
@ -235,7 +235,7 @@ func renderBlame(ctx *context.Context, blameParts []git.BlamePart, commitNames m
}
lines := make([]string, 0)
rows := make([]*blameRow, 0)
escapeStatus := charset.EscapeStatus{}
escapeStatus := &charset.EscapeStatus{}
i := 0
commitCnt := 0
@ -280,7 +280,7 @@ func renderBlame(ctx *context.Context, blameParts []git.BlamePart, commitNames m
fileName := fmt.Sprintf("%v", ctx.Data["FileName"])
line = highlight.Code(fileName, language, line)
br.EscapeStatus, line = charset.EscapeControlString(line)
br.EscapeStatus, line = charset.EscapeControlHTML(line, ctx.Locale)
br.Code = gotemplate.HTML(line)
rows = append(rows, br)
escapeStatus = escapeStatus.Or(br.EscapeStatus)

View file

@ -309,7 +309,7 @@ func LFSFileGet(ctx *context.Context) {
// Building code view blocks with line number on server side.
escapedContent := &bytes.Buffer{}
ctx.Data["EscapeStatus"], _ = charset.EscapeControlReader(rd, escapedContent)
ctx.Data["EscapeStatus"], _ = charset.EscapeControlReader(rd, escapedContent, ctx.Locale)
var output bytes.Buffer
lines := strings.Split(escapedContent.String(), "\n")

View file

@ -328,35 +328,31 @@ func renderReadmeFile(ctx *context.Context, readmeFile *namedBlob, readmeTreelin
if markupType := markup.Type(readmeFile.name); markupType != "" {
ctx.Data["IsMarkup"] = true
ctx.Data["MarkupType"] = markupType
var result strings.Builder
err := markup.Render(&markup.RenderContext{
ctx.Data["EscapeStatus"], ctx.Data["FileContent"], err = markupRender(ctx, &markup.RenderContext{
Ctx: ctx,
RelativePath: path.Join(ctx.Repo.TreePath, readmeFile.name), // ctx.Repo.TreePath is the directory not the Readme so we must append the Readme filename (and path).
URLPrefix: readmeTreelink,
Metas: ctx.Repo.Repository.ComposeDocumentMetas(),
GitRepo: ctx.Repo.GitRepo,
}, rd, &result)
}, rd)
if err != nil {
log.Error("Render failed: %v then fallback", err)
log.Error("Render failed for %s in %-v: %v Falling back to rendering source", readmeFile.name, ctx.Repo.Repository, err)
buf := &bytes.Buffer{}
ctx.Data["EscapeStatus"], _ = charset.EscapeControlReader(rd, buf)
ctx.Data["EscapeStatus"], _ = charset.EscapeControlReader(rd, buf, ctx.Locale)
ctx.Data["FileContent"] = strings.ReplaceAll(
gotemplate.HTMLEscapeString(buf.String()), "\n", `<br>`,
)
} else {
ctx.Data["EscapeStatus"], ctx.Data["FileContent"] = charset.EscapeControlString(result.String())
}
} else {
ctx.Data["IsRenderedHTML"] = true
buf := &bytes.Buffer{}
ctx.Data["EscapeStatus"], err = charset.EscapeControlReader(rd, buf)
ctx.Data["EscapeStatus"], err = charset.EscapeControlReader(rd, &charset.BreakWriter{Writer: buf}, ctx.Locale, charset.RuneNBSP)
if err != nil {
log.Error("Read failed: %v", err)
}
ctx.Data["FileContent"] = strings.ReplaceAll(
gotemplate.HTMLEscapeString(buf.String()), "\n", `<br>`,
)
ctx.Data["FileContent"] = buf.String()
}
}
@ -498,32 +494,30 @@ func renderFile(ctx *context.Context, entry *git.TreeEntry, treeLink, rawLink st
if markupType != "" && !shouldRenderSource {
ctx.Data["IsMarkup"] = true
ctx.Data["MarkupType"] = markupType
var result strings.Builder
if !detected {
markupType = ""
}
metas := ctx.Repo.Repository.ComposeDocumentMetas()
metas["BranchNameSubURL"] = ctx.Repo.BranchNameSubURL()
err := markup.Render(&markup.RenderContext{
ctx.Data["EscapeStatus"], ctx.Data["FileContent"], err = markupRender(ctx, &markup.RenderContext{
Ctx: ctx,
Type: markupType,
RelativePath: ctx.Repo.TreePath,
URLPrefix: path.Dir(treeLink),
Metas: metas,
GitRepo: ctx.Repo.GitRepo,
}, rd, &result)
}, rd)
if err != nil {
ctx.ServerError("Render", err)
return
}
// to prevent iframe load third-party url
ctx.Resp.Header().Add("Content-Security-Policy", "frame-src 'self'")
ctx.Data["EscapeStatus"], ctx.Data["FileContent"] = charset.EscapeControlString(result.String())
} else if readmeExist && !shouldRenderSource {
buf := &bytes.Buffer{}
ctx.Data["IsRenderedHTML"] = true
ctx.Data["EscapeStatus"], _ = charset.EscapeControlReader(rd, buf)
ctx.Data["EscapeStatus"], _ = charset.EscapeControlReader(rd, buf, ctx.Locale)
ctx.Data["FileContent"] = strings.ReplaceAll(
gotemplate.HTMLEscapeString(buf.String()), "\n", `<br>`,
@ -570,12 +564,13 @@ func renderFile(ctx *context.Context, entry *git.TreeEntry, treeLink, rawLink st
log.Error("highlight.File failed, fallback to plain text: %v", err)
fileContent = highlight.PlainText(buf)
}
status, _ := charset.EscapeControlReader(bytes.NewReader(buf), io.Discard)
ctx.Data["EscapeStatus"] = status
statuses := make([]charset.EscapeStatus, len(fileContent))
status := &charset.EscapeStatus{}
statuses := make([]*charset.EscapeStatus, len(fileContent))
for i, line := range fileContent {
statuses[i], fileContent[i] = charset.EscapeControlString(line)
statuses[i], fileContent[i] = charset.EscapeControlHTML(line, ctx.Locale)
status = status.Or(statuses[i])
}
ctx.Data["EscapeStatus"] = status
ctx.Data["FileContent"] = fileContent
ctx.Data["LineEscapeStatus"] = statuses
}
@ -613,20 +608,17 @@ func renderFile(ctx *context.Context, entry *git.TreeEntry, treeLink, rawLink st
rd := io.MultiReader(bytes.NewReader(buf), dataRc)
ctx.Data["IsMarkup"] = true
ctx.Data["MarkupType"] = markupType
var result strings.Builder
err := markup.Render(&markup.RenderContext{
ctx.Data["EscapeStatus"], ctx.Data["FileContent"], err = markupRender(ctx, &markup.RenderContext{
Ctx: ctx,
RelativePath: ctx.Repo.TreePath,
URLPrefix: path.Dir(treeLink),
Metas: ctx.Repo.Repository.ComposeDocumentMetas(),
GitRepo: ctx.Repo.GitRepo,
}, rd, &result)
}, rd)
if err != nil {
ctx.ServerError("Render", err)
return
}
ctx.Data["EscapeStatus"], ctx.Data["FileContent"] = charset.EscapeControlString(result.String())
}
}
@ -645,6 +637,23 @@ func renderFile(ctx *context.Context, entry *git.TreeEntry, treeLink, rawLink st
}
}
func markupRender(ctx *context.Context, renderCtx *markup.RenderContext, input io.Reader) (escaped *charset.EscapeStatus, output string, err error) {
markupRd, markupWr := io.Pipe()
defer markupWr.Close()
done := make(chan struct{})
go func() {
sb := &strings.Builder{}
// We allow NBSP here this is rendered
escaped, _ = charset.EscapeControlReader(markupRd, sb, ctx.Locale, charset.RuneNBSP)
output = sb.String()
close(done)
}()
err = markup.Render(renderCtx, input, markupWr)
_ = markupWr.CloseWithError(err)
<-done
return escaped, output, err
}
func safeURL(address string) string {
u, err := url.Parse(address)
if err != nil {

View file

@ -239,9 +239,28 @@ func renderViewPage(ctx *context.Context) (*git.Repository, *git.TreeEntry) {
Metas: ctx.Repo.Repository.ComposeDocumentMetas(),
IsWiki: true,
}
buf := &strings.Builder{}
var buf strings.Builder
if err := markdown.Render(rctx, bytes.NewReader(data), &buf); err != nil {
renderFn := func(data []byte) (escaped *charset.EscapeStatus, output string, err error) {
markupRd, markupWr := io.Pipe()
defer markupWr.Close()
done := make(chan struct{})
go func() {
// We allow NBSP here this is rendered
escaped, _ = charset.EscapeControlReader(markupRd, buf, ctx.Locale, charset.RuneNBSP)
output = buf.String()
buf.Reset()
close(done)
}()
err = markdown.Render(rctx, bytes.NewReader(data), markupWr)
_ = markupWr.CloseWithError(err)
<-done
return escaped, output, err
}
ctx.Data["EscapeStatus"], ctx.Data["content"], err = renderFn(data)
if err != nil {
if wikiRepo != nil {
wikiRepo.Close()
}
@ -249,11 +268,10 @@ func renderViewPage(ctx *context.Context) (*git.Repository, *git.TreeEntry) {
return nil, nil
}
ctx.Data["EscapeStatus"], ctx.Data["content"] = charset.EscapeControlString(buf.String())
if !isSideBar {
buf.Reset()
if err := markdown.Render(rctx, bytes.NewReader(sidebarContent), &buf); err != nil {
ctx.Data["sidebarEscapeStatus"], ctx.Data["sidebarContent"], err = renderFn(sidebarContent)
if err != nil {
if wikiRepo != nil {
wikiRepo.Close()
}
@ -261,14 +279,14 @@ func renderViewPage(ctx *context.Context) (*git.Repository, *git.TreeEntry) {
return nil, nil
}
ctx.Data["sidebarPresent"] = sidebarContent != nil
ctx.Data["sidebarEscapeStatus"], ctx.Data["sidebarContent"] = charset.EscapeControlString(buf.String())
} else {
ctx.Data["sidebarPresent"] = false
}
if !isFooter {
buf.Reset()
if err := markdown.Render(rctx, bytes.NewReader(footerContent), &buf); err != nil {
ctx.Data["footerEscapeStatus"], ctx.Data["footerContent"], err = renderFn(footerContent)
if err != nil {
if wikiRepo != nil {
wikiRepo.Close()
}
@ -276,7 +294,6 @@ func renderViewPage(ctx *context.Context) (*git.Repository, *git.TreeEntry) {
return nil, nil
}
ctx.Data["footerPresent"] = footerContent != nil
ctx.Data["footerEscapeStatus"], ctx.Data["footerContent"] = charset.EscapeControlString(buf.String())
} else {
ctx.Data["footerPresent"] = false
}

View file

@ -32,6 +32,7 @@ import (
"code.gitea.io/gitea/modules/lfs"
"code.gitea.io/gitea/modules/log"
"code.gitea.io/gitea/modules/setting"
"code.gitea.io/gitea/modules/translation"
"github.com/sergi/go-diff/diffmatchpatch"
stdcharset "golang.org/x/net/html/charset"
@ -169,11 +170,11 @@ func getDiffLineSectionInfo(treePath, line string, lastLeftIdx, lastRightIdx int
}
// escape a line's content or return <br> needed for copy/paste purposes
func getLineContent(content string) DiffInline {
func getLineContent(content string, locale translation.Locale) DiffInline {
if len(content) > 0 {
return DiffInlineWithUnicodeEscape(template.HTML(html.EscapeString(content)))
return DiffInlineWithUnicodeEscape(template.HTML(html.EscapeString(content)), locale)
}
return DiffInline{Content: "<br>"}
return DiffInline{EscapeStatus: &charset.EscapeStatus{}, Content: "<br>"}
}
// DiffSection represents a section of a DiffFile.
@ -267,26 +268,26 @@ func init() {
// DiffInline is a struct that has a content and escape status
type DiffInline struct {
EscapeStatus charset.EscapeStatus
EscapeStatus *charset.EscapeStatus
Content template.HTML
}
// DiffInlineWithUnicodeEscape makes a DiffInline with hidden unicode characters escaped
func DiffInlineWithUnicodeEscape(s template.HTML) DiffInline {
status, content := charset.EscapeControlString(string(s))
func DiffInlineWithUnicodeEscape(s template.HTML, locale translation.Locale) DiffInline {
status, content := charset.EscapeControlHTML(string(s), locale)
return DiffInline{EscapeStatus: status, Content: template.HTML(content)}
}
// DiffInlineWithHighlightCode makes a DiffInline with code highlight and hidden unicode characters escaped
func DiffInlineWithHighlightCode(fileName, language, code string) DiffInline {
status, content := charset.EscapeControlString(highlight.Code(fileName, language, code))
func DiffInlineWithHighlightCode(fileName, language, code string, locale translation.Locale) DiffInline {
status, content := charset.EscapeControlHTML(highlight.Code(fileName, language, code), locale)
return DiffInline{EscapeStatus: status, Content: template.HTML(content)}
}
// GetComputedInlineDiffFor computes inline diff for the given line.
func (diffSection *DiffSection) GetComputedInlineDiffFor(diffLine *DiffLine) DiffInline {
func (diffSection *DiffSection) GetComputedInlineDiffFor(diffLine *DiffLine, locale translation.Locale) DiffInline {
if setting.Git.DisableDiffHighlight {
return getLineContent(diffLine.Content[1:])
return getLineContent(diffLine.Content[1:], locale)
}
var (
@ -303,26 +304,26 @@ func (diffSection *DiffSection) GetComputedInlineDiffFor(diffLine *DiffLine) Dif
// try to find equivalent diff line. ignore, otherwise
switch diffLine.Type {
case DiffLineSection:
return getLineContent(diffLine.Content[1:])
return getLineContent(diffLine.Content[1:], locale)
case DiffLineAdd:
compareDiffLine = diffSection.GetLine(DiffLineDel, diffLine.RightIdx)
if compareDiffLine == nil {
return DiffInlineWithHighlightCode(diffSection.FileName, language, diffLine.Content[1:])
return DiffInlineWithHighlightCode(diffSection.FileName, language, diffLine.Content[1:], locale)
}
diff1 = compareDiffLine.Content
diff2 = diffLine.Content
case DiffLineDel:
compareDiffLine = diffSection.GetLine(DiffLineAdd, diffLine.LeftIdx)
if compareDiffLine == nil {
return DiffInlineWithHighlightCode(diffSection.FileName, language, diffLine.Content[1:])
return DiffInlineWithHighlightCode(diffSection.FileName, language, diffLine.Content[1:], locale)
}
diff1 = diffLine.Content
diff2 = compareDiffLine.Content
default:
if strings.IndexByte(" +-", diffLine.Content[0]) > -1 {
return DiffInlineWithHighlightCode(diffSection.FileName, language, diffLine.Content[1:])
return DiffInlineWithHighlightCode(diffSection.FileName, language, diffLine.Content[1:], locale)
}
return DiffInlineWithHighlightCode(diffSection.FileName, language, diffLine.Content)
return DiffInlineWithHighlightCode(diffSection.FileName, language, diffLine.Content, locale)
}
hcd := newHighlightCodeDiff()
@ -330,7 +331,7 @@ func (diffSection *DiffSection) GetComputedInlineDiffFor(diffLine *DiffLine) Dif
// it seems that Gitea doesn't need the line wrapper of Chroma, so do not add them back
// if the line wrappers are still needed in the future, it can be added back by "diffToHTML(hcd.lineWrapperTags. ...)"
diffHTML := diffToHTML(nil, diffRecord, diffLine.Type)
return DiffInlineWithUnicodeEscape(template.HTML(diffHTML))
return DiffInlineWithUnicodeEscape(template.HTML(diffHTML), locale)
}
// DiffFile represents a file diff.

View file

@ -55,7 +55,11 @@
<span id="L{{$row.RowNumber}}" data-line-number="{{$row.RowNumber}}"></span>
</td>
{{if $.EscapeStatus.Escaped}}
<td class="lines-escape">{{if $row.EscapeStatus.Escaped}}<a href="" class="toggle-escape-button" title="{{$.locale.Tr "repo.line_unicode"}}"></a>{{end}}</td>
<td class="lines-escape">
{{if $row.EscapeStatus.Escaped}}
<a href="" class="toggle-escape-button" title="{{template "repo/diff/escape_title" dict "diff" $row "locale" $.locale}}"></a>
{{end}}
</td>
{{end}}
<td rel="L{{$row.RowNumber}}" rel="L{{$row.RowNumber}}" class="lines-code blame-code chroma">
<code class="code-inner pl-3">{{$row.Code}}</code>

View file

@ -19,20 +19,25 @@
</a>
{{end}}
</td>
<td colspan="5" class="lines-code lines-code-old ">{{$inlineDiff := $.section.GetComputedInlineDiffFor $line}}<code {{if $inlineDiff.EscapeStatus.Escaped}}class="code-inner has-escaped" title="{{$.locale.Tr "repo.line_unicode"}}"{{else}}class="code-inner"{{end}}>{{$inlineDiff.Content}}</code></td>
<td colspan="5" class="lines-code lines-code-old ">{{$inlineDiff := $.section.GetComputedInlineDiffFor $line $.locale}}{{/*
*/}}{{template "repo/diff/section_code" dict "diff" $inlineDiff "locale" $.locale}}</td>
{{else}}
{{$inlineDiff := $.section.GetComputedInlineDiffFor $line}}
{{$inlineDiff := $.section.GetComputedInlineDiffFor $line $.locale}}
<td class="lines-num lines-num-old" data-line-num="{{if $line.LeftIdx}}{{$line.LeftIdx}}{{end}}"><span rel="{{if $line.LeftIdx}}diff-{{$.FileNameHash}}L{{$line.LeftIdx}}{{end}}"></span></td>
<td class="blob-excerpt lines-escape lines-escape-old">{{if and $line.LeftIdx $inlineDiff.EscapeStatus.Escaped}}<a href="" class="toggle-escape-button" title="{{$.locale.Tr "repo.line_unicode"}}"></a>{{end}}</td>
<td class="blob-excerpt lines-escape lines-escape-old">{{if and $line.LeftIdx $inlineDiff.EscapeStatus.Escaped}}<a href="" class="toggle-escape-button" title="{{template "repo/diff/escape_title" dict "diff" $inlineDiff "locale" $.locale}}"></a>{{end}}</td>
<td class="blob-excerpt lines-type-marker lines-type-marker-old">{{if $line.LeftIdx}}<span class="mono" data-type-marker=""></span>{{end}}</td>
<td class="blob-excerpt lines-code lines-code-old halfwidth">{{/*
*/}}<code {{if and $line.LeftIdx $inlineDiff.EscapeStatus.Escaped}}class="code-inner has-escaped" title="{{$.locale.Tr "repo.line_unicode"}}"{{else}}class="code-inner"{{end}}>{{if $line.LeftIdx}}{{$inlineDiff.Content}}{{end}}</code>{{/*
*/}}{{if $line.LeftIdx}}{{template "repo/diff/section_code" dict "diff" $inlineDiff "locale" $.locale}}{{else}}{{/*
*/}}<code class="code-inner"></code>{{/*
*/}}{{end}}{{/*
*/}}</td>
<td class="lines-num lines-num-new" data-line-num="{{if $line.RightIdx}}{{$line.RightIdx}}{{end}}"><span rel="{{if $line.RightIdx}}diff-{{$.FileNameHash}}R{{$line.RightIdx}}{{end}}"></span></td>
<td class="blob-excerpt lines-escape lines-escape-new">{{if and $line.RightIdx $inlineDiff.EscapeStatus.Escaped}}<a href="" class="toggle-escape-button" title="{{$.locale.Tr "repo.line_unicode"}}"></a>{{end}}</td>
<td class="blob-excerpt lines-escape lines-escape-new">{{if and $line.RightIdx $inlineDiff.EscapeStatus.Escaped}}<a href="" class="toggle-escape-button" title="{{template "repo/diff/escape_title" dict "diff" $inlineDiff "locale" $.locale}}"></a>{{end}}</td>
<td class="blob-excerpt lines-type-marker lines-type-marker-new">{{if $line.RightIdx}}<span class="mono" data-type-marker=""></span>{{end}}</td>
<td class="blob-excerpt lines-code lines-code-new halfwidth">{{/*
*/}}<code {{if and $line.RightIdx $inlineDiff.EscapeStatus.Escaped}}class="code-inner has-escaped" title="{{$.locale.Tr "repo.line_unicode"}}"{{else}}class="code-inner"{{end}}>{{if $line.RightIdx}}{{$inlineDiff.Content}}{{end}}</code>{{/*
*/}}{{if $line.RightIdx}}{{template "repo/diff/section_code" dict "diff" $inlineDiff "locale" $.locale}}{{else}}{{/*
*/}}<code class="code-inner"></code>{{/*
*/}}{{end}}{{/*
*/}}</td>
{{end}}
</tr>
@ -62,10 +67,10 @@
<td class="lines-num lines-num-old" data-line-num="{{if $line.LeftIdx}}{{$line.LeftIdx}}{{end}}"><span rel="{{if $line.LeftIdx}}diff-{{$.FileNameHash}}L{{$line.LeftIdx}}{{end}}"></span></td>
<td class="lines-num lines-num-new" data-line-num="{{if $line.RightIdx}}{{$line.RightIdx}}{{end}}"><span rel="{{if $line.RightIdx}}diff-{{$.FileNameHash}}R{{$line.RightIdx}}{{end}}"></span></td>
{{end}}
{{$inlineDiff := $.section.GetComputedInlineDiffFor $line}}
<td class="blob-excerpt lines-escape">{{if $inlineDiff.EscapeStatus.Escaped}}<a href="" class="toggle-escape-button" title="{{$.locale.Tr "repo.line_unicode"}}"></a>{{end}}</td>
{{$inlineDiff := $.section.GetComputedInlineDiffFor $line $.locale}}
<td class="blob-excerpt lines-escape">{{if $inlineDiff.EscapeStatus.Escaped}}<a href="" class="toggle-escape-button" title="{{template "repo/diff/escape_title" dict "diff" $inlineDiff "locale" $.locale}}"></a>{{end}}</td>
<td class="blob-excerpt lines-type-marker"><span class="mono" data-type-marker="{{$line.GetLineTypeMarker}}"></span></td>
<td class="blob-excerpt lines-code{{if (not $line.RightIdx)}} lines-code-old{{end}}"><code {{if $inlineDiff.EscapeStatus.Escaped}}class="code-inner has-escaped" title="{{$.locale.Tr "repo.line_unicode"}}"{{else}}class="code-inner"{{end}}>{{$inlineDiff.Content}}</code></td>
<td class="blob-excerpt lines-code{{if (not $line.RightIdx)}} lines-code-old{{end}}"><code {{if $inlineDiff.EscapeStatus.Escaped}}class="code-inner has-escaped" title="{{template "repo/diff/escape_title" dict "diff" $inlineDiff "locale" $.locale}}"{{else}}class="code-inner"{{end}}>{{$inlineDiff.Content}}</code></td>
</tr>
{{end}}
{{end}}

View file

@ -0,0 +1,2 @@
{{if .diff.EscapeStatus.HasInvisible}}{{.locale.Tr "repo.invisible_runes_line"}} {{end}}{{/*
*/}}{{if .diff.EscapeStatus.HasAmbiguous}}{{.locale.Tr "repo.ambiguous_runes_line"}}{{end}}

View file

@ -0,0 +1,6 @@
<code {{if .diff.EscapeStatus.Escaped}}{{/*
*/}}class="code-inner has-escaped" {{/*
*/}}title="{{template "repo/diff/escape_title" .}}"{{/*
*/}}{{else}}{{/*
*/}}class="code-inner"{{/*
*/}}{{end}}>{{.diff.Content}}</code>

View file

@ -21,15 +21,17 @@
{{svg "octicon-fold"}}
</a>
{{end}}
</td>{{$inlineDiff := $section.GetComputedInlineDiffFor $line}}
<td class="lines-escape lines-escape-old">{{if $inlineDiff.EscapeStatus.Escaped}}<a href="" class="toggle-escape-button" title="{{$.locale.Tr "repo.line_unicode"}}"></a>{{end}}</td>
<td colspan="6" class="lines-code lines-code-old "><code {{if $inlineDiff.EscapeStatus.Escaped}}class="code-inner has-escaped" title="{{$.root.locale.Tr "repo.line_unicode"}}"{{else}}class="code-inner"{{end}}>{{$inlineDiff.Content}}</span></td>
</td>{{$inlineDiff := $section.GetComputedInlineDiffFor $line $.root.locale}}
<td class="lines-escape lines-escape-old">{{if $inlineDiff.EscapeStatus.Escaped}}<a href="" class="toggle-escape-button" title="{{template "repo/diff/escape_title" dict "diff" $inlineDiff "locale" $.locale}}"></a>{{end}}</td>
<td colspan="6" class="lines-code lines-code-old ">{{/*
*/}}{{template "repo/diff/section_code" dict "diff" $inlineDiff "locale" $.root.locale}}{{/*
*/}}</td>
{{else if and (eq .GetType 3) $hasmatch}}{{/* DEL */}}
{{$match := index $section.Lines $line.Match}}
{{- $leftDiff := ""}}{{if $line.LeftIdx}}{{$leftDiff = $section.GetComputedInlineDiffFor $line}}{{end}}
{{- $rightDiff := ""}}{{if $match.RightIdx}}{{$rightDiff = $section.GetComputedInlineDiffFor $match}}{{end}}
{{- $leftDiff := ""}}{{if $line.LeftIdx}}{{$leftDiff = $section.GetComputedInlineDiffFor $line $.root.locale}}{{end}}
{{- $rightDiff := ""}}{{if $match.RightIdx}}{{$rightDiff = $section.GetComputedInlineDiffFor $match $.root.locale}}{{end}}
<td class="lines-num lines-num-old del-code" data-line-num="{{$line.LeftIdx}}"><span rel="diff-{{$file.NameHash}}L{{$line.LeftIdx}}"></span></td>
<td class="lines-escape del-code lines-escape-old">{{if $line.LeftIdx}}{{if $leftDiff.EscapeStatus.Escaped}}<a href="" class="toggle-escape-button" title="{{$.locale.Tr "repo.line_unicode"}}"></a>{{end}}{{end}}</td>
<td class="lines-escape del-code lines-escape-old">{{if $line.LeftIdx}}{{if $leftDiff.EscapeStatus.Escaped}}<a href="" class="toggle-escape-button" title="{{template "repo/diff/escape_title" dict "diff" $leftDiff "locale" $.locale}}"></a>{{end}}{{end}}</td>
<td class="lines-type-marker lines-type-marker-old del-code"><span class="mono" data-type-marker="{{$line.GetLineTypeMarker}}"></span></td>
<td class="lines-code lines-code-old halfwidth del-code">{{/*
*/}}{{if and $.root.SignedUserID $.root.PageIsPullFiles}}{{/*
@ -38,13 +40,13 @@
*/}}</a>{{/*
*/}}{{end}}{{/*
*/}}{{if $line.LeftIdx}}{{/*
*/}}<code {{if $leftDiff.EscapeStatus.Escaped}}class="code-inner has-escaped" title="{{$.root.locale.Tr "repo.line_unicode"}}"{{else}}class="code-inner"{{end}}>{{$leftDiff.Content}}</code>{{/*
*/}}{{template "repo/diff/section_code" dict "diff" $leftDiff "locale" $.root.locale}}{{/*
*/}}{{else}}{{/*
*/}}<code class="code-inner"></code>{{/*
*/}}{{end}}{{/*
*/}}</td>
<td class="lines-num lines-num-new add-code" data-line-num="{{if $match.RightIdx}}{{$match.RightIdx}}{{end}}"><span rel="{{if $match.RightIdx}}diff-{{$file.NameHash}}R{{$match.RightIdx}}{{end}}"></span></td>
<td class="lines-escape add-code lines-escape-new">{{if $match.RightIdx}}{{if $rightDiff.EscapeStatus.Escaped}}<a href="" class="toggle-escape-button" title="{{$.locale.Tr "repo.line_unicode"}}"></a>{{end}}{{end}}</td>
<td class="lines-escape add-code lines-escape-new">{{if $match.RightIdx}}{{if $rightDiff.EscapeStatus.Escaped}}<a href="" class="toggle-escape-button" title="{{template "repo/diff/escape_title" dict "diff" $rightDiff "locale" $.locale}}"></a>{{end}}{{end}}</td>
<td class="lines-type-marker lines-type-marker-new add-code">{{if $match.RightIdx}}<span class="mono" data-type-marker="{{$match.GetLineTypeMarker}}"></span>{{end}}</td>
<td class="lines-code lines-code-new halfwidth add-code">{{/*
*/}}{{if and $.root.SignedUserID $.root.PageIsPullFiles}}{{/*
@ -53,15 +55,15 @@
*/}}</a>{{/*
*/}}{{end}}{{/*
*/}}{{if $match.RightIdx}}{{/*
*/}}<code {{if $rightDiff.EscapeStatus.Escaped}}class="code-inner has-escaped" title="{{$.root.locale.Tr "repo.line_unicode"}}"{{else}}class="code-inner"{{end}}>{{$rightDiff.Content}}</code>{{/*
*/}}{{template "repo/diff/section_code" dict "diff" $rightDiff "locale" $.root.locale}}{{/*
*/}}{{else}}{{/*
*/}}<code class="code-inner"></code>{{/*
*/}}{{end}}{{/*
*/}}</td>
{{else}}
{{$inlineDiff := $section.GetComputedInlineDiffFor $line}}
{{$inlineDiff := $section.GetComputedInlineDiffFor $line $.root.locale}}
<td class="lines-num lines-num-old" data-line-num="{{if $line.LeftIdx}}{{$line.LeftIdx}}{{end}}"><span rel="{{if $line.LeftIdx}}diff-{{$file.NameHash}}L{{$line.LeftIdx}}{{end}}"></span></td>
<td class="lines-escape lines-escape-old">{{if $line.LeftIdx}}{{if $inlineDiff.EscapeStatus.Escaped}}<a href="" class="toggle-escape-button" title="{{$.locale.Tr "repo.line_unicode"}}"></a>{{end}}{{end}}</td>
<td class="lines-escape lines-escape-old">{{if $line.LeftIdx}}{{if $inlineDiff.EscapeStatus.Escaped}}<a href="" class="toggle-escape-button" title="{{template "repo/diff/escape_title" dict "diff" $inlineDiff "locale" $.locale}}"></a>{{end}}{{end}}</td>
<td class="lines-type-marker lines-type-marker-old">{{if $line.LeftIdx}}<span class="mono" data-type-marker="{{$line.GetLineTypeMarker}}"></span>{{end}}</td>
<td class="lines-code lines-code-old halfwidth">{{/*
*/}}{{if and $.root.SignedUserID $.root.PageIsPullFiles (not (eq .GetType 2))}}{{/*
@ -70,13 +72,13 @@
*/}}</a>{{/*
*/}}{{end}}{{/*
*/}}{{if $line.LeftIdx}}{{/*
*/}}<code {{if $inlineDiff.EscapeStatus.Escaped}}class="code-inner has-escaped" title="{{$.root.locale.Tr "repo.line_unicode"}}"{{else}}class="code-inner"{{end}}>{{$inlineDiff.Content}}</code>{{/*
*/}}{{template "repo/diff/section_code" dict "diff" $inlineDiff "locale" $.root.locale}}{{/*
*/}}{{else}}{{/*
*/}}<code class="code-inner"></code>{{/*
*/}}{{end}}{{/*
*/}}</td>
<td class="lines-num lines-num-new" data-line-num="{{if $line.RightIdx}}{{$line.RightIdx}}{{end}}"><span rel="{{if $line.RightIdx}}diff-{{$file.NameHash}}R{{$line.RightIdx}}{{end}}"></span></td>
<td class="lines-escape lines-escape-new">{{if $line.RightIdx}}{{if $inlineDiff.EscapeStatus.Escaped}}<a href="" class="toggle-escape-button" title="{{$.locale.Tr "repo.line_unicode"}}"></a>{{end}}{{end}}</td>
<td class="lines-escape lines-escape-new">{{if $line.RightIdx}}{{if $inlineDiff.EscapeStatus.Escaped}}<a href="" class="toggle-escape-button" title="{{template "repo/diff/escape_title" dict "diff" $inlineDiff "locale" $.locale}}"></a>{{end}}{{end}}</td>
<td class="lines-type-marker lines-type-marker-new">{{if $line.RightIdx}}<span class="mono" data-type-marker="{{$line.GetLineTypeMarker}}"></span>{{end}}</td>
<td class="lines-code lines-code-new halfwidth">{{/*
*/}}{{if and $.root.SignedUserID $.root.PageIsPullFiles (not (eq .GetType 3))}}{{/*
@ -85,7 +87,7 @@
*/}}</a>{{/*
*/}}{{end}}{{/*
*/}}{{if $line.RightIdx}}{{/*
*/}}<code {{if $inlineDiff.EscapeStatus.Escaped}}class="code-inner has-escaped" title="{{$.root.locale.Tr "repo.line_unicode"}}"{{else}}class="code-inner"{{end}}>{{$inlineDiff.Content}}</code>{{/*
*/}}{{template "repo/diff/section_code" dict "diff" $inlineDiff "locale" $.root.locale}}{{/*
*/}}{{else}}{{/*
*/}}<code class="code-inner"></code>{{/*
*/}}{{end}}{{/*

View file

@ -25,12 +25,12 @@
<td class="lines-num lines-num-old" data-line-num="{{if $line.LeftIdx}}{{$line.LeftIdx}}{{end}}"><span rel="{{if $line.LeftIdx}}diff-{{$file.NameHash}}L{{$line.LeftIdx}}{{end}}"></span></td>
<td class="lines-num lines-num-new" data-line-num="{{if $line.RightIdx}}{{$line.RightIdx}}{{end}}"><span rel="{{if $line.RightIdx}}diff-{{$file.NameHash}}R{{$line.RightIdx}}{{end}}"></span></td>
{{end}}
{{$inlineDiff := $section.GetComputedInlineDiffFor $line -}}
<td class="lines-escape">{{if $inlineDiff.EscapeStatus.Escaped}}<a href="" class="toggle-escape-button" title="{{$.locale.Tr "repo.line_unicode"}}"></a>{{end}}</td>
{{$inlineDiff := $section.GetComputedInlineDiffFor $line $.root.locale -}}
<td class="lines-escape">{{if $inlineDiff.EscapeStatus.Escaped}}<a href="" class="toggle-escape-button" title="{{template "repo/diff/escape_title" dict "diff" $inlineDiff "locale" $.locale}}"></a>{{end}}</td>
<td class="lines-type-marker"><span class="mono" data-type-marker="{{$line.GetLineTypeMarker}}"></span></td>
{{if eq .GetType 4}}
<td class="chroma lines-code blob-hunk">{{/*
*/}}<code {{if $inlineDiff.EscapeStatus.Escaped}}class="code-inner has-escaped" title="{{$.root.locale.Tr "repo.line_unicode"}}"{{else}}class="code-inner"{{end}}>{{$inlineDiff.Content}}</code>{{/*
*/}}{{template "repo/diff/section_code" dict "diff" $inlineDiff "locale" $.root.locale}}{{/*
*/}}</td>
{{else}}
<td class="chroma lines-code{{if (not $line.RightIdx)}} lines-code-old{{end}}">{{/*
@ -39,7 +39,7 @@
*/}}{{svg "octicon-plus"}}{{/*
*/}}</a>{{/*
*/}}{{end}}{{/*
*/}}<code {{if $inlineDiff.EscapeStatus.Escaped}}class="code-inner has-escaped" title="{{$.root.locale.Tr "repo.line_unicode"}}"{{else}}class="code-inner"{{end}}>{{$inlineDiff.Content}}</code>{{/*
*/}}{{template "repo/diff/section_code" dict "diff" $inlineDiff "locale" $.root.locale}}{{/*
*/}}</td>
{{end}}
</tr>

View file

@ -1,19 +1,22 @@
{{if .EscapeStatus}}
{{if .EscapeStatus.BadBIDI}}
{{if .EscapeStatus.HasInvisible}}
<div class="ui error message unicode-escape-prompt tl">
<span class="close icon hide-panel button" data-panel-closest=".message">{{svg "octicon-x" 16 "close inside"}}</span>
<div class="header">
{{$.root.locale.Tr "repo.bidi_bad_header"}}
{{$.root.locale.Tr "repo.invisible_runes_header"}}
</div>
<p>{{$.root.locale.Tr "repo.bidi_bad_description" | Str2html}}</p>
<p>{{$.root.locale.Tr "repo.invisible_runes_description" | Str2html}}</p>
{{if .EscapeStatus.HasAmbiguous}}
<p>{{$.root.locale.Tr "repo.ambiguous_runes_description" | Str2html}}</p>
{{end}}
</div>
{{else if .EscapeStatus.HasBIDI}}
{{else if .EscapeStatus.HasAmbiguous}}
<div class="ui warning message unicode-escape-prompt tl">
<span class="close icon hide-panel button" data-panel-closest=".message">{{svg "octicon-x" 16 "close inside"}}</span>
<div class="header">
{{$.root.locale.Tr "repo.unicode_header"}}
{{$.root.locale.Tr "repo.ambiguous_runes_header"}}
</div>
<p>{{$.root.locale.Tr "repo.unicode_description" | Str2html}}</p>
<p>{{$.root.locale.Tr "repo.ambiguous_runes_description" | Str2html}}</p>
</div>
{{end}}
{{end}}

View file

@ -113,7 +113,7 @@
<tr>
<td id="L{{$line}}" class="lines-num"><span id="L{{$line}}" data-line-number="{{$line}}"></span></td>
{{if $.EscapeStatus.Escaped}}
<td class="lines-escape">{{if (index $.LineEscapeStatus $idx).Escaped}}<a href="" class="toggle-escape-button" title="{{$.locale.Tr "repo.line_unicode"}}"></a>{{end}}</td>
<td class="lines-escape">{{if (index $.LineEscapeStatus $idx).Escaped}}<a href="" class="toggle-escape-button" title="{{if (index $.LineEscapeStatus $idx).HasInvisible}}{{$.locale.Tr "repo.invisible_runes_line"}} {{end}}{{if (index $.LineEscapeStatus $idx).HasAmbiguous}}{{$.locale.Tr "repo.ambiguous_runes_line"}}{{end}}"></a>{{end}}</td>
{{end}}
<td rel="L{{$line}}" class="lines-code chroma"><code class="code-inner">{{$code | Safe}}</code></td>
</tr>

View file

@ -82,7 +82,11 @@
.broken-code-point {
font-family: var(--fonts-monospace);
color: blue;
color: var(--color-blue);
}
.unicode-escaped .ambiguous-code-point {
border: 1px var(--color-yellow) solid;
}
.metas {