Refactor renders (#15175)

* Refactor renders

* Some performance optimization

* Fix comment

* Transform reader

* Fix csv test

* Fix test

* Fix tests

* Improve optimaziation

* Fix test

* Fix test

* Detect file encoding with reader

* Improve optimaziation

* reduce memory usage

* improve code

* fix build

* Fix test

* Fix for go1.15

* Fix render

* Fix comment

* Fix lint

* Fix test

* Don't use NormalEOF when unnecessary

* revert change on util.go

* Apply suggestions from code review

Co-authored-by: zeripath <art27@cantab.net>

* rename function

* Take NormalEOF back

Co-authored-by: zeripath <art27@cantab.net>
This commit is contained in:
Lunny Xiao 2021-04-20 06:25:08 +08:00 committed by GitHub
parent c9cc6698d2
commit 9d99f6ab19
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
41 changed files with 1027 additions and 627 deletions

View file

@ -7,6 +7,8 @@ package charset
import (
"bytes"
"fmt"
"io"
"io/ioutil"
"strings"
"unicode/utf8"
@ -21,6 +23,33 @@ import (
// UTF8BOM is the utf-8 byte-order marker
var UTF8BOM = []byte{'\xef', '\xbb', '\xbf'}
// ToUTF8WithFallbackReader detects the encoding of content and coverts to UTF-8 reader if possible
func ToUTF8WithFallbackReader(rd io.Reader) io.Reader {
var buf = make([]byte, 2048)
n, err := rd.Read(buf)
if err != nil {
return rd
}
charsetLabel, err := DetectEncoding(buf[:n])
if err != nil || charsetLabel == "UTF-8" {
return io.MultiReader(bytes.NewReader(RemoveBOMIfPresent(buf[:n])), rd)
}
encoding, _ := charset.Lookup(charsetLabel)
if encoding == nil {
return io.MultiReader(bytes.NewReader(buf[:n]), rd)
}
return transform.NewReader(
io.MultiReader(
bytes.NewReader(RemoveBOMIfPresent(buf[:n])),
rd,
),
encoding.NewDecoder(),
)
}
// ToUTF8WithErr converts content to UTF8 encoding
func ToUTF8WithErr(content []byte) (string, error) {
charsetLabel, err := DetectEncoding(content)
@ -49,24 +78,8 @@ func ToUTF8WithErr(content []byte) (string, error) {
// ToUTF8WithFallback detects the encoding of content and coverts to UTF-8 if possible
func ToUTF8WithFallback(content []byte) []byte {
charsetLabel, err := DetectEncoding(content)
if err != nil || charsetLabel == "UTF-8" {
return RemoveBOMIfPresent(content)
}
encoding, _ := charset.Lookup(charsetLabel)
if encoding == nil {
return content
}
// If there is an error, we concatenate the nicely decoded part and the
// original left over. This way we won't lose data.
result, n, err := transform.Bytes(encoding.NewDecoder(), content)
if err != nil {
return append(result, content[n:]...)
}
return RemoveBOMIfPresent(result)
bs, _ := ioutil.ReadAll(ToUTF8WithFallbackReader(bytes.NewReader(content)))
return bs
}
// ToUTF8 converts content to UTF8 encoding and ignore error