Add option to disable ambiguous unicode characters detection (#28454)
* Close #24483 * Close #28123 * Close #23682 * Close #23149 (maybe more)
This commit is contained in:
parent
408a484224
commit
20929edc99
17 changed files with 113 additions and 149 deletions
|
@ -8,11 +8,12 @@
|
|||
package charset
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"html/template"
|
||||
"io"
|
||||
"strings"
|
||||
|
||||
"code.gitea.io/gitea/modules/log"
|
||||
"code.gitea.io/gitea/modules/setting"
|
||||
"code.gitea.io/gitea/modules/translation"
|
||||
)
|
||||
|
||||
|
@ -20,20 +21,18 @@ import (
|
|||
const RuneNBSP = 0xa0
|
||||
|
||||
// EscapeControlHTML escapes the unicode control sequences in a provided html document
|
||||
func EscapeControlHTML(text string, locale translation.Locale, allowed ...rune) (escaped *EscapeStatus, output string) {
|
||||
func EscapeControlHTML(html template.HTML, locale translation.Locale, allowed ...rune) (escaped *EscapeStatus, output template.HTML) {
|
||||
sb := &strings.Builder{}
|
||||
outputStream := &HTMLStreamerWriter{Writer: sb}
|
||||
streamer := NewEscapeStreamer(locale, outputStream, allowed...).(*escapeStreamer)
|
||||
|
||||
if err := StreamHTML(strings.NewReader(text), streamer); err != nil {
|
||||
streamer.escaped.HasError = true
|
||||
log.Error("Error whilst escaping: %v", err)
|
||||
}
|
||||
return streamer.escaped, sb.String()
|
||||
escaped, _ = EscapeControlReader(strings.NewReader(string(html)), sb, locale, allowed...) // err has been handled in EscapeControlReader
|
||||
return escaped, template.HTML(sb.String())
|
||||
}
|
||||
|
||||
// EscapeControlReaders escapes the unicode control sequences in a provided reader of HTML content and writer in a locale and returns the findings as an EscapeStatus and the escaped []byte
|
||||
// EscapeControlReader escapes the unicode control sequences in a provided reader of HTML content and writer in a locale and returns the findings as an EscapeStatus
|
||||
func EscapeControlReader(reader io.Reader, writer io.Writer, locale translation.Locale, allowed ...rune) (escaped *EscapeStatus, err error) {
|
||||
if !setting.UI.AmbiguousUnicodeDetection {
|
||||
_, err = io.Copy(writer, reader)
|
||||
return &EscapeStatus{}, err
|
||||
}
|
||||
outputStream := &HTMLStreamerWriter{Writer: writer}
|
||||
streamer := NewEscapeStreamer(locale, outputStream, allowed...).(*escapeStreamer)
|
||||
|
||||
|
@ -43,41 +42,3 @@ func EscapeControlReader(reader io.Reader, writer io.Writer, locale translation.
|
|||
}
|
||||
return streamer.escaped, err
|
||||
}
|
||||
|
||||
// EscapeControlStringReader escapes the unicode control sequences in a provided reader of string content and writer in a locale and returns the findings as an EscapeStatus and the escaped []byte. HTML line breaks are not inserted after every newline by this method.
|
||||
func EscapeControlStringReader(reader io.Reader, writer io.Writer, locale translation.Locale, allowed ...rune) (escaped *EscapeStatus, err error) {
|
||||
bufRd := bufio.NewReader(reader)
|
||||
outputStream := &HTMLStreamerWriter{Writer: writer}
|
||||
streamer := NewEscapeStreamer(locale, outputStream, allowed...).(*escapeStreamer)
|
||||
|
||||
for {
|
||||
line, rdErr := bufRd.ReadString('\n')
|
||||
if len(line) > 0 {
|
||||
if err := streamer.Text(line); err != nil {
|
||||
streamer.escaped.HasError = true
|
||||
log.Error("Error whilst escaping: %v", err)
|
||||
return streamer.escaped, err
|
||||
}
|
||||
}
|
||||
if rdErr != nil {
|
||||
if rdErr != io.EOF {
|
||||
err = rdErr
|
||||
}
|
||||
break
|
||||
}
|
||||
}
|
||||
return streamer.escaped, err
|
||||
}
|
||||
|
||||
// EscapeControlString escapes the unicode control sequences in a provided string and returns the findings as an EscapeStatus and the escaped string
|
||||
func EscapeControlString(text string, locale translation.Locale, allowed ...rune) (escaped *EscapeStatus, output string) {
|
||||
sb := &strings.Builder{}
|
||||
outputStream := &HTMLStreamerWriter{Writer: sb}
|
||||
streamer := NewEscapeStreamer(locale, outputStream, allowed...).(*escapeStreamer)
|
||||
|
||||
if err := streamer.Text(text); err != nil {
|
||||
streamer.escaped.HasError = true
|
||||
log.Error("Error whilst escaping: %v", err)
|
||||
}
|
||||
return streamer.escaped, sb.String()
|
||||
}
|
||||
|
|
|
@ -64,7 +64,7 @@ func (e *escapeStreamer) Text(data string) error {
|
|||
until, next = nextIdxs[0]+pos, nextIdxs[1]+pos
|
||||
}
|
||||
|
||||
// from pos until until we know that the runes are not \r\t\n or even ' '
|
||||
// from pos until we know that the runes are not \r\t\n or even ' '
|
||||
runes := make([]rune, 0, next-until)
|
||||
positions := make([]int, 0, next-until+1)
|
||||
|
||||
|
|
|
@ -4,11 +4,14 @@
|
|||
package charset
|
||||
|
||||
import (
|
||||
"reflect"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"code.gitea.io/gitea/modules/setting"
|
||||
"code.gitea.io/gitea/modules/test"
|
||||
"code.gitea.io/gitea/modules/translation"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
)
|
||||
|
||||
type escapeControlTest struct {
|
||||
|
@ -132,22 +135,8 @@ then resh (ר), and finally heh (ה) (which should appear leftmost).`,
|
|||
},
|
||||
}
|
||||
|
||||
func TestEscapeControlString(t *testing.T) {
|
||||
for _, tt := range escapeControlTests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
status, result := EscapeControlString(tt.text, &translation.MockLocale{})
|
||||
if !reflect.DeepEqual(*status, tt.status) {
|
||||
t.Errorf("EscapeControlString() status = %v, wanted= %v", status, tt.status)
|
||||
}
|
||||
if result != tt.result {
|
||||
t.Errorf("EscapeControlString()\nresult= %v,\nwanted= %v", result, tt.result)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestEscapeControlReader(t *testing.T) {
|
||||
// lets add some control characters to the tests
|
||||
// add some control characters to the tests
|
||||
tests := make([]escapeControlTest, 0, len(escapeControlTests)*3)
|
||||
copy(tests, escapeControlTests)
|
||||
|
||||
|
@ -169,29 +158,20 @@ func TestEscapeControlReader(t *testing.T) {
|
|||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
input := strings.NewReader(tt.text)
|
||||
output := &strings.Builder{}
|
||||
status, err := EscapeControlReader(input, output, &translation.MockLocale{})
|
||||
result := output.String()
|
||||
if err != nil {
|
||||
t.Errorf("EscapeControlReader(): err = %v", err)
|
||||
}
|
||||
|
||||
if !reflect.DeepEqual(*status, tt.status) {
|
||||
t.Errorf("EscapeControlReader() status = %v, wanted= %v", status, tt.status)
|
||||
}
|
||||
if result != tt.result {
|
||||
t.Errorf("EscapeControlReader()\nresult= %v,\nwanted= %v", result, tt.result)
|
||||
}
|
||||
status, err := EscapeControlReader(strings.NewReader(tt.text), output, &translation.MockLocale{})
|
||||
assert.NoError(t, err)
|
||||
assert.Equal(t, tt.status, *status)
|
||||
assert.Equal(t, tt.result, output.String())
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestEscapeControlReader_panic(t *testing.T) {
|
||||
bs := make([]byte, 0, 20479)
|
||||
bs = append(bs, 'A')
|
||||
for i := 0; i < 6826; i++ {
|
||||
bs = append(bs, []byte("—")...)
|
||||
}
|
||||
_, _ = EscapeControlString(string(bs), &translation.MockLocale{})
|
||||
func TestSettingAmbiguousUnicodeDetection(t *testing.T) {
|
||||
defer test.MockVariableValue(&setting.UI.AmbiguousUnicodeDetection, true)()
|
||||
_, out := EscapeControlHTML("a test", &translation.MockLocale{})
|
||||
assert.EqualValues(t, `a<span class="escaped-code-point" data-escaped="[U+00A0]"><span class="char"> </span></span>test`, out)
|
||||
setting.UI.AmbiguousUnicodeDetection = false
|
||||
_, out = EscapeControlHTML("a test", &translation.MockLocale{})
|
||||
assert.EqualValues(t, `a test`, out)
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue