Refactor markup render system (#32533)

Remove unmaintainable sanitizer rules. No need to add special "class"
regexp rules anymore, use RenderInternal.SafeAttr instead, more details
(and examples) are in the tests
This commit is contained in:
wxiaoguang 2024-11-18 13:25:42 +08:00 committed by GitHub
parent 4f879a00df
commit 8a20fba8eb
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
42 changed files with 568 additions and 508 deletions

View file

@ -9,15 +9,27 @@ package common
import (
"bytes"
"regexp"
"sync"
"github.com/yuin/goldmark"
"github.com/yuin/goldmark/ast"
"github.com/yuin/goldmark/parser"
"github.com/yuin/goldmark/text"
"github.com/yuin/goldmark/util"
"mvdan.cc/xurls/v2"
)
var wwwURLRegxp = regexp.MustCompile(`^www\.[-a-zA-Z0-9@:%._\+~#=]{2,256}\.[a-z]{2,6}((?:/|[#?])[-a-zA-Z0-9@:%_\+.~#!?&//=\(\);,'">\^{}\[\]` + "`" + `]*)?`)
type GlobalVarsType struct {
wwwURLRegxp *regexp.Regexp
LinkRegex *regexp.Regexp // fast matching a URL link, no any extra validation.
}
var GlobalVars = sync.OnceValue[*GlobalVarsType](func() *GlobalVarsType {
v := &GlobalVarsType{}
v.wwwURLRegxp = regexp.MustCompile(`^www\.[-a-zA-Z0-9@:%._\+~#=]{2,256}\.[a-z]{2,6}((?:/|[#?])[-a-zA-Z0-9@:%_\+.~#!?&//=\(\);,'">\^{}\[\]` + "`" + `]*)?`)
v.LinkRegex, _ = xurls.StrictMatchingScheme("https?://")
return v
})
type linkifyParser struct{}
@ -60,10 +72,10 @@ func (s *linkifyParser) Parse(parent ast.Node, block text.Reader, pc parser.Cont
var protocol []byte
typ := ast.AutoLinkURL
if bytes.HasPrefix(line, protoHTTP) || bytes.HasPrefix(line, protoHTTPS) || bytes.HasPrefix(line, protoFTP) {
m = LinkRegex.FindSubmatchIndex(line)
m = GlobalVars().LinkRegex.FindSubmatchIndex(line)
}
if m == nil && bytes.HasPrefix(line, domainWWW) {
m = wwwURLRegxp.FindSubmatchIndex(line)
m = GlobalVars().wwwURLRegxp.FindSubmatchIndex(line)
protocol = []byte("http")
}
if m != nil {