Refactor renders (#15175)
* Refactor renders * Some performance optimization * Fix comment * Transform reader * Fix csv test * Fix test * Fix tests * Improve optimaziation * Fix test * Fix test * Detect file encoding with reader * Improve optimaziation * reduce memory usage * improve code * fix build * Fix test * Fix for go1.15 * Fix render * Fix comment * Fix lint * Fix test * Don't use NormalEOF when unnecessary * revert change on util.go * Apply suggestions from code review Co-authored-by: zeripath <art27@cantab.net> * rename function * Take NormalEOF back Co-authored-by: zeripath <art27@cantab.net>
This commit is contained in:
parent
c9cc6698d2
commit
9d99f6ab19
41 changed files with 1027 additions and 627 deletions
|
@ -7,6 +7,8 @@ package charset
|
|||
import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
"io"
|
||||
"io/ioutil"
|
||||
"strings"
|
||||
"unicode/utf8"
|
||||
|
||||
|
@ -21,6 +23,33 @@ import (
|
|||
// UTF8BOM is the utf-8 byte-order marker
|
||||
var UTF8BOM = []byte{'\xef', '\xbb', '\xbf'}
|
||||
|
||||
// ToUTF8WithFallbackReader detects the encoding of content and coverts to UTF-8 reader if possible
|
||||
func ToUTF8WithFallbackReader(rd io.Reader) io.Reader {
|
||||
var buf = make([]byte, 2048)
|
||||
n, err := rd.Read(buf)
|
||||
if err != nil {
|
||||
return rd
|
||||
}
|
||||
|
||||
charsetLabel, err := DetectEncoding(buf[:n])
|
||||
if err != nil || charsetLabel == "UTF-8" {
|
||||
return io.MultiReader(bytes.NewReader(RemoveBOMIfPresent(buf[:n])), rd)
|
||||
}
|
||||
|
||||
encoding, _ := charset.Lookup(charsetLabel)
|
||||
if encoding == nil {
|
||||
return io.MultiReader(bytes.NewReader(buf[:n]), rd)
|
||||
}
|
||||
|
||||
return transform.NewReader(
|
||||
io.MultiReader(
|
||||
bytes.NewReader(RemoveBOMIfPresent(buf[:n])),
|
||||
rd,
|
||||
),
|
||||
encoding.NewDecoder(),
|
||||
)
|
||||
}
|
||||
|
||||
// ToUTF8WithErr converts content to UTF8 encoding
|
||||
func ToUTF8WithErr(content []byte) (string, error) {
|
||||
charsetLabel, err := DetectEncoding(content)
|
||||
|
@ -49,24 +78,8 @@ func ToUTF8WithErr(content []byte) (string, error) {
|
|||
|
||||
// ToUTF8WithFallback detects the encoding of content and coverts to UTF-8 if possible
|
||||
func ToUTF8WithFallback(content []byte) []byte {
|
||||
charsetLabel, err := DetectEncoding(content)
|
||||
if err != nil || charsetLabel == "UTF-8" {
|
||||
return RemoveBOMIfPresent(content)
|
||||
}
|
||||
|
||||
encoding, _ := charset.Lookup(charsetLabel)
|
||||
if encoding == nil {
|
||||
return content
|
||||
}
|
||||
|
||||
// If there is an error, we concatenate the nicely decoded part and the
|
||||
// original left over. This way we won't lose data.
|
||||
result, n, err := transform.Bytes(encoding.NewDecoder(), content)
|
||||
if err != nil {
|
||||
return append(result, content[n:]...)
|
||||
}
|
||||
|
||||
return RemoveBOMIfPresent(result)
|
||||
bs, _ := ioutil.ReadAll(ToUTF8WithFallbackReader(bytes.NewReader(content)))
|
||||
return bs
|
||||
}
|
||||
|
||||
// ToUTF8 converts content to UTF8 encoding and ignore error
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue