Refactor markup render system (#32533)

Remove unmaintainable sanitizer rules. No need to add special "class"
regexp rules anymore, use RenderInternal.SafeAttr instead, more details
(and examples) are in the tests
This commit is contained in:
wxiaoguang 2024-11-18 13:25:42 +08:00 committed by GitHub
parent 4f879a00df
commit 8a20fba8eb
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
42 changed files with 568 additions and 508 deletions

View file

@ -34,13 +34,6 @@ func NewDetails() *Details {
}
}
// IsDetails returns true if the given node implements the Details interface,
// otherwise false.
func IsDetails(node ast.Node) bool {
_, ok := node.(*Details)
return ok
}
// Summary is a block that contains the summary of details block
type Summary struct {
ast.BaseBlock
@ -66,13 +59,6 @@ func NewSummary() *Summary {
}
}
// IsSummary returns true if the given node implements the Summary interface,
// otherwise false.
func IsSummary(node ast.Node) bool {
_, ok := node.(*Summary)
return ok
}
// TaskCheckBoxListItem is a block that represents a list item of a markdown block with a checkbox
type TaskCheckBoxListItem struct {
*ast.ListItem
@ -103,14 +89,7 @@ func NewTaskCheckBoxListItem(listItem *ast.ListItem) *TaskCheckBoxListItem {
}
}
// IsTaskCheckBoxListItem returns true if the given node implements the TaskCheckBoxListItem interface,
// otherwise false.
func IsTaskCheckBoxListItem(node ast.Node) bool {
_, ok := node.(*TaskCheckBoxListItem)
return ok
}
// Icon is an inline for a fomantic icon
// Icon is an inline for a Fomantic UI icon
type Icon struct {
ast.BaseInline
Name []byte
@ -139,13 +118,6 @@ func NewIcon(name string) *Icon {
}
}
// IsIcon returns true if the given node implements the Icon interface,
// otherwise false.
func IsIcon(node ast.Node) bool {
_, ok := node.(*Icon)
return ok
}
// ColorPreview is an inline for a color preview
type ColorPreview struct {
ast.BaseInline

View file

@ -7,9 +7,11 @@ import (
"fmt"
"regexp"
"strings"
"sync"
"code.gitea.io/gitea/modules/container"
"code.gitea.io/gitea/modules/markup"
"code.gitea.io/gitea/modules/markup/internal"
"code.gitea.io/gitea/modules/setting"
"github.com/yuin/goldmark/ast"
@ -23,11 +25,13 @@ import (
// ASTTransformer is a default transformer of the goldmark tree.
type ASTTransformer struct {
renderInternal *internal.RenderInternal
attentionTypes container.Set[string]
}
func NewASTTransformer() *ASTTransformer {
func NewASTTransformer(renderInternal *internal.RenderInternal) *ASTTransformer {
return &ASTTransformer{
renderInternal: renderInternal,
attentionTypes: container.SetOf("note", "tip", "important", "warning", "caution"),
}
}
@ -109,12 +113,16 @@ func (g *ASTTransformer) Transform(node *ast.Document, reader text.Reader, pc pa
}
}
// NewHTMLRenderer creates a HTMLRenderer to render
// in the gitea form.
func NewHTMLRenderer(opts ...html.Option) renderer.NodeRenderer {
// it is copied from old code, which is quite doubtful whether it is correct
var reValidIconName = sync.OnceValue[*regexp.Regexp](func() *regexp.Regexp {
return regexp.MustCompile(`^[-\w]+$`) // old: regexp.MustCompile("^[a-z ]+$")
})
// NewHTMLRenderer creates a HTMLRenderer to render in the gitea form.
func NewHTMLRenderer(renderInternal *internal.RenderInternal, opts ...html.Option) renderer.NodeRenderer {
r := &HTMLRenderer{
Config: html.NewConfig(),
reValidName: regexp.MustCompile("^[a-z ]+$"),
renderInternal: renderInternal,
Config: html.NewConfig(),
}
for _, opt := range opts {
opt.SetHTMLOption(&r.Config)
@ -126,7 +134,7 @@ func NewHTMLRenderer(opts ...html.Option) renderer.NodeRenderer {
// renders gitea specific features.
type HTMLRenderer struct {
html.Config
reValidName *regexp.Regexp
renderInternal *internal.RenderInternal
}
// RegisterFuncs implements renderer.NodeRenderer.RegisterFuncs.
@ -214,12 +222,13 @@ func (r *HTMLRenderer) renderIcon(w util.BufWriter, source []byte, node ast.Node
return ast.WalkContinue, nil
}
if !r.reValidName.MatchString(name) {
if !reValidIconName().MatchString(name) {
// skip this
return ast.WalkContinue, nil
}
_, err := w.WriteString(fmt.Sprintf(`<i class="icon %s"></i>`, name))
// FIXME: the "icon xxx" is from Fomantic UI, it's really questionable whether it still works correctly
err := r.renderInternal.FormatWithSafeAttrs(w, `<i class="icon %s"></i>`, name)
if err != nil {
return ast.WalkStop, err
}

View file

@ -9,7 +9,6 @@ import (
"html/template"
"io"
"strings"
"sync"
"code.gitea.io/gitea/modules/log"
"code.gitea.io/gitea/modules/markup"
@ -29,11 +28,6 @@ import (
"github.com/yuin/goldmark/util"
)
var (
specMarkdown goldmark.Markdown
specMarkdownOnce sync.Once
)
var (
renderContextKey = parser.NewContextKey()
renderConfigKey = parser.NewContextKey()
@ -68,85 +62,95 @@ func newParserContext(ctx *markup.RenderContext) parser.Context {
return pc
}
// SpecializedMarkdown sets up the Gitea specific markdown extensions
func SpecializedMarkdown() goldmark.Markdown {
specMarkdownOnce.Do(func() {
specMarkdown = goldmark.New(
goldmark.WithExtensions(
extension.NewTable(
extension.WithTableCellAlignMethod(extension.TableCellAlignAttribute)),
extension.Strikethrough,
extension.TaskList,
extension.DefinitionList,
common.FootnoteExtension,
highlighting.NewHighlighting(
highlighting.WithFormatOptions(
chromahtml.WithClasses(true),
chromahtml.PreventSurroundingPre(true),
),
highlighting.WithWrapperRenderer(func(w util.BufWriter, c highlighting.CodeBlockContext, entering bool) {
if entering {
language, _ := c.Language()
if language == nil {
language = []byte("text")
}
type GlodmarkRender struct {
ctx *markup.RenderContext
languageStr := string(language)
preClasses := []string{"code-block"}
if languageStr == "mermaid" || languageStr == "math" {
preClasses = append(preClasses, "is-loading")
}
_, err := w.WriteString(`<pre class="` + strings.Join(preClasses, " ") + `">`)
if err != nil {
return
}
// include language-x class as part of commonmark spec
// the "display" class is used by "js/markup/math.js" to render the code element as a block
_, err = w.WriteString(`<code class="chroma language-` + string(language) + ` display">`)
if err != nil {
return
}
} else {
_, err := w.WriteString("</code></pre>")
if err != nil {
return
}
}
}),
),
math.NewExtension(
math.Enabled(setting.Markdown.EnableMath),
),
meta.Meta,
),
goldmark.WithParserOptions(
parser.WithAttribute(),
parser.WithAutoHeadingID(),
parser.WithASTTransformers(
util.Prioritized(NewASTTransformer(), 10000),
),
),
goldmark.WithRendererOptions(
html.WithUnsafe(),
),
)
// Override the original Tasklist renderer!
specMarkdown.Renderer().AddOptions(
renderer.WithNodeRenderers(
util.Prioritized(NewHTMLRenderer(), 10),
),
)
})
return specMarkdown
goldmarkMarkdown goldmark.Markdown
}
// actualRender renders Markdown to HTML without handling special links.
func actualRender(ctx *markup.RenderContext, input io.Reader, output io.Writer) error {
converter := SpecializedMarkdown()
func (r *GlodmarkRender) Convert(source []byte, writer io.Writer, opts ...parser.ParseOption) error {
return r.goldmarkMarkdown.Convert(source, writer, opts...)
}
func (r *GlodmarkRender) Renderer() renderer.Renderer {
return r.goldmarkMarkdown.Renderer()
}
func (r *GlodmarkRender) highlightingRenderer(w util.BufWriter, c highlighting.CodeBlockContext, entering bool) {
if entering {
language, _ := c.Language()
if language == nil {
language = []byte("text")
}
languageStr := string(language)
preClasses := []string{"code-block"}
if languageStr == "mermaid" || languageStr == "math" {
preClasses = append(preClasses, "is-loading")
}
err := r.ctx.RenderInternal.FormatWithSafeAttrs(w, `<pre class="%s">`, strings.Join(preClasses, " "))
if err != nil {
return
}
// include language-x class as part of commonmark spec
// the "display" class is used by "js/markup/math.js" to render the code element as a block
err = r.ctx.RenderInternal.FormatWithSafeAttrs(w, `<code class="chroma language-%s display">`, string(language))
if err != nil {
return
}
} else {
_, err := w.WriteString("</code></pre>")
if err != nil {
return
}
}
}
// SpecializedMarkdown sets up the Gitea specific markdown extensions
func SpecializedMarkdown(ctx *markup.RenderContext) *GlodmarkRender {
// TODO: it could use a pool to cache the renderers to reuse them with different contexts
// at the moment it is fast enough (see the benchmarks)
r := &GlodmarkRender{ctx: ctx}
r.goldmarkMarkdown = goldmark.New(
goldmark.WithExtensions(
extension.NewTable(extension.WithTableCellAlignMethod(extension.TableCellAlignAttribute)),
extension.Strikethrough,
extension.TaskList,
extension.DefinitionList,
common.FootnoteExtension,
highlighting.NewHighlighting(
highlighting.WithFormatOptions(
chromahtml.WithClasses(true),
chromahtml.PreventSurroundingPre(true),
),
highlighting.WithWrapperRenderer(r.highlightingRenderer),
),
math.NewExtension(&ctx.RenderInternal, math.Enabled(setting.Markdown.EnableMath)),
meta.Meta,
),
goldmark.WithParserOptions(
parser.WithAttribute(),
parser.WithAutoHeadingID(),
parser.WithASTTransformers(util.Prioritized(NewASTTransformer(&ctx.RenderInternal), 10000)),
),
goldmark.WithRendererOptions(html.WithUnsafe()),
)
// Override the original Tasklist renderer!
r.goldmarkMarkdown.Renderer().AddOptions(
renderer.WithNodeRenderers(util.Prioritized(NewHTMLRenderer(&ctx.RenderInternal), 10)),
)
return r
}
// render calls goldmark render to convert Markdown to HTML
// NOTE: The output of this method MUST get sanitized separately!!!
func render(ctx *markup.RenderContext, input io.Reader, output io.Writer) error {
converter := SpecializedMarkdown(ctx)
lw := &limitWriter{
w: output,
limit: setting.UI.MaxDisplayFileSize * 3,
@ -160,8 +164,8 @@ func actualRender(ctx *markup.RenderContext, input io.Reader, output io.Writer)
}
log.Warn("Unable to render markdown due to panic in goldmark: %v", err)
if log.IsDebug() {
log.Debug("Panic in markdown: %v\n%s", err, log.Stack(2))
if (!setting.IsProd && !setting.IsInTesting) || log.IsDebug() {
log.Error("Panic in markdown: %v\n%s", err, log.Stack(2))
}
}()
@ -200,26 +204,6 @@ func actualRender(ctx *markup.RenderContext, input io.Reader, output io.Writer)
return nil
}
// Note: The output of this method must get sanitized.
func render(ctx *markup.RenderContext, input io.Reader, output io.Writer) error {
defer func() {
err := recover()
if err == nil {
return
}
log.Warn("Unable to render markdown due to panic in goldmark - will return raw bytes")
if log.IsDebug() {
log.Debug("Panic in markdown: %v\n%s", err, log.Stack(2))
}
_, err = io.Copy(output, input)
if err != nil {
log.Error("io.Copy failed: %v", err)
}
}()
return actualRender(ctx, input, output)
}
// MarkupName describes markup's name
var MarkupName = "markdown"

View file

@ -1051,3 +1051,17 @@ func TestAttention(t *testing.T) {
// legacy GitHub style
test(`> **warning**`, renderAttention("warning", "octicon-alert")+"\n</blockquote>")
}
func BenchmarkSpecializedMarkdown(b *testing.B) {
// 240856 4719 ns/op
for i := 0; i < b.N; i++ {
markdown.SpecializedMarkdown(&markup.RenderContext{})
}
}
func BenchmarkMarkdownRender(b *testing.B) {
// 23202 50840 ns/op
for i := 0; i < b.N; i++ {
_, _ = markdown.RenderString(&markup.RenderContext{Ctx: context.Background()}, "https://example.com\n- a\n- b\n")
}
}

View file

@ -4,17 +4,21 @@
package math
import (
"code.gitea.io/gitea/modules/markup/internal"
gast "github.com/yuin/goldmark/ast"
"github.com/yuin/goldmark/renderer"
"github.com/yuin/goldmark/util"
)
// BlockRenderer represents a renderer for math Blocks
type BlockRenderer struct{}
type BlockRenderer struct {
renderInternal *internal.RenderInternal
}
// NewBlockRenderer creates a new renderer for math Blocks
func NewBlockRenderer() renderer.NodeRenderer {
return &BlockRenderer{}
func NewBlockRenderer(renderInternal *internal.RenderInternal) renderer.NodeRenderer {
return &BlockRenderer{renderInternal: renderInternal}
}
// RegisterFuncs registers the renderer for math Blocks
@ -33,7 +37,7 @@ func (r *BlockRenderer) writeLines(w util.BufWriter, source []byte, n gast.Node)
func (r *BlockRenderer) renderBlock(w util.BufWriter, source []byte, node gast.Node, entering bool) (gast.WalkStatus, error) {
n := node.(*Block)
if entering {
_, _ = w.WriteString(`<pre class="code-block is-loading"><code class="chroma language-math display">`)
_ = r.renderInternal.FormatWithSafeAttrs(w, `<pre class="code-block is-loading"><code class="chroma language-math display">`)
r.writeLines(w, source, n)
} else {
_, _ = w.WriteString(`</code></pre>` + "\n")

View file

@ -6,17 +6,21 @@ package math
import (
"bytes"
"code.gitea.io/gitea/modules/markup/internal"
"github.com/yuin/goldmark/ast"
"github.com/yuin/goldmark/renderer"
"github.com/yuin/goldmark/util"
)
// InlineRenderer is an inline renderer
type InlineRenderer struct{}
type InlineRenderer struct {
renderInternal *internal.RenderInternal
}
// NewInlineRenderer returns a new renderer for inline math
func NewInlineRenderer() renderer.NodeRenderer {
return &InlineRenderer{}
func NewInlineRenderer(renderInternal *internal.RenderInternal) renderer.NodeRenderer {
return &InlineRenderer{renderInternal: renderInternal}
}
func (r *InlineRenderer) renderInline(w util.BufWriter, source []byte, n ast.Node, entering bool) (ast.WalkStatus, error) {
@ -25,7 +29,7 @@ func (r *InlineRenderer) renderInline(w util.BufWriter, source []byte, n ast.Nod
if _, ok := n.(*InlineBlock); ok {
extraClass = "display "
}
_, _ = w.WriteString(`<code class="language-math ` + extraClass + `is-loading">`)
_ = r.renderInternal.FormatWithSafeAttrs(w, `<code class="language-math %sis-loading">`, extraClass)
for c := n.FirstChild(); c != nil; c = c.NextSibling() {
segment := c.(*ast.Text).Segment
value := util.EscapeHTML(segment.Value(source))

View file

@ -4,6 +4,8 @@
package math
import (
"code.gitea.io/gitea/modules/markup/internal"
"github.com/yuin/goldmark"
"github.com/yuin/goldmark/parser"
"github.com/yuin/goldmark/renderer"
@ -12,6 +14,7 @@ import (
// Extension is a math extension
type Extension struct {
renderInternal *internal.RenderInternal
enabled bool
parseDollarInline bool
parseDollarBlock bool
@ -39,38 +42,10 @@ func Enabled(enable ...bool) Option {
})
}
// WithInlineDollarParser enables or disables the parsing of $...$
func WithInlineDollarParser(enable ...bool) Option {
value := true
if len(enable) > 0 {
value = enable[0]
}
return extensionFunc(func(e *Extension) {
e.parseDollarInline = value
})
}
// WithBlockDollarParser enables or disables the parsing of $$...$$
func WithBlockDollarParser(enable ...bool) Option {
value := true
if len(enable) > 0 {
value = enable[0]
}
return extensionFunc(func(e *Extension) {
e.parseDollarBlock = value
})
}
// Math represents a math extension with default rendered delimiters
var Math = &Extension{
enabled: true,
parseDollarBlock: true,
parseDollarInline: true,
}
// NewExtension creates a new math extension with the provided options
func NewExtension(opts ...Option) *Extension {
func NewExtension(renderInternal *internal.RenderInternal, opts ...Option) *Extension {
r := &Extension{
renderInternal: renderInternal,
enabled: true,
parseDollarBlock: true,
parseDollarInline: true,
@ -102,7 +77,7 @@ func (e *Extension) Extend(m goldmark.Markdown) {
m.Parser().AddOptions(parser.WithInlineParsers(inlines...))
m.Renderer().AddOptions(renderer.WithNodeRenderers(
util.Prioritized(NewBlockRenderer(), 501),
util.Prioritized(NewInlineRenderer(), 502),
util.Prioritized(NewBlockRenderer(e.renderInternal), 501),
util.Prioritized(NewInlineRenderer(e.renderInternal), 502),
))
}

View file

@ -11,10 +11,8 @@ import (
"github.com/stretchr/testify/assert"
)
/*
IssueTemplate is a legacy to keep the unit tests working.
Copied from structs.IssueTemplate, the original type has been changed a lot to support yaml template.
*/
// IssueTemplate is a legacy to keep the unit tests working.
// Copied from structs.IssueTemplate, the original type has been changed a lot to support yaml template.
type IssueTemplate struct {
Name string `json:"name" yaml:"name"`
Title string `json:"title" yaml:"title"`

View file

@ -32,7 +32,8 @@ func (r *HTMLRenderer) renderAttention(w util.BufWriter, source []byte, node ast
default: // including "note"
octiconName = "info"
}
_, _ = w.WriteString(string(svg.RenderHTML("octicon-"+octiconName, 16, "attention-icon attention-"+n.AttentionType)))
svgHTML := svg.RenderHTML("octicon-"+octiconName, 16, "attention-icon attention-"+n.AttentionType)
_, _ = w.WriteString(string(r.renderInternal.ProtectSafeAttrs(svgHTML)))
}
return ast.WalkContinue, nil
}
@ -128,13 +129,13 @@ func (g *ASTTransformer) transformBlockquote(v *ast.Blockquote, reader text.Read
}
// color the blockquote
v.SetAttributeString("class", []byte("attention-header attention-"+attentionType))
v.SetAttributeString(g.renderInternal.SafeAttr("class"), []byte(g.renderInternal.SafeValue("attention-header attention-"+attentionType)))
// create an emphasis to make it bold
attentionParagraph := ast.NewParagraph()
g.applyElementDir(attentionParagraph)
emphasis := ast.NewEmphasis(2)
emphasis.SetAttributeString("class", []byte("attention-"+attentionType))
emphasis.SetAttributeString(g.renderInternal.SafeAttr("class"), []byte(g.renderInternal.SafeValue("attention-"+attentionType)))
attentionAstString := ast.NewString([]byte(cases.Title(language.English).String(attentionType)))

View file

@ -5,7 +5,6 @@ package markdown
import (
"bytes"
"fmt"
"strings"
"code.gitea.io/gitea/modules/markup"
@ -40,7 +39,7 @@ func (r *HTMLRenderer) renderCodeSpan(w util.BufWriter, source []byte, n ast.Nod
r.Writer.RawWrite(w, value)
}
case *ColorPreview:
_, _ = w.WriteString(fmt.Sprintf(`<span class="color-preview" style="background-color: %v"></span>`, string(v.Color)))
_ = r.renderInternal.FormatWithSafeAttrs(w, `<span class="color-preview" style="background-color: %s"></span>`, string(v.Color))
}
}
return ast.WalkSkipChildren, nil

View file

@ -72,7 +72,7 @@ func (g *ASTTransformer) transformList(_ *markup.RenderContext, v *ast.List, rc
}
newChild := NewTaskCheckBoxListItem(listItem)
newChild.IsChecked = taskCheckBox.IsChecked
newChild.SetAttributeString("class", []byte("task-list-item"))
newChild.SetAttributeString(g.renderInternal.SafeAttr("class"), []byte(g.renderInternal.SafeValue("task-list-item")))
segments := newChild.FirstChild().Lines()
if segments.Len() > 0 {
segment := segments.At(0)