Refactor markup render system (#32533)

Remove unmaintainable sanitizer rules. No need to add special "class"
regexp rules anymore, use RenderInternal.SafeAttr instead, more details
(and examples) are in the tests
This commit is contained in:
wxiaoguang 2024-11-18 13:25:42 +08:00 committed by GitHub
parent 4f879a00df
commit 8a20fba8eb
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
42 changed files with 568 additions and 508 deletions

View file

@ -9,7 +9,6 @@ import (
"html/template"
"io"
"strings"
"sync"
"code.gitea.io/gitea/modules/log"
"code.gitea.io/gitea/modules/markup"
@ -29,11 +28,6 @@ import (
"github.com/yuin/goldmark/util"
)
var (
specMarkdown goldmark.Markdown
specMarkdownOnce sync.Once
)
var (
renderContextKey = parser.NewContextKey()
renderConfigKey = parser.NewContextKey()
@ -68,85 +62,95 @@ func newParserContext(ctx *markup.RenderContext) parser.Context {
return pc
}
// SpecializedMarkdown sets up the Gitea specific markdown extensions
func SpecializedMarkdown() goldmark.Markdown {
specMarkdownOnce.Do(func() {
specMarkdown = goldmark.New(
goldmark.WithExtensions(
extension.NewTable(
extension.WithTableCellAlignMethod(extension.TableCellAlignAttribute)),
extension.Strikethrough,
extension.TaskList,
extension.DefinitionList,
common.FootnoteExtension,
highlighting.NewHighlighting(
highlighting.WithFormatOptions(
chromahtml.WithClasses(true),
chromahtml.PreventSurroundingPre(true),
),
highlighting.WithWrapperRenderer(func(w util.BufWriter, c highlighting.CodeBlockContext, entering bool) {
if entering {
language, _ := c.Language()
if language == nil {
language = []byte("text")
}
type GlodmarkRender struct {
ctx *markup.RenderContext
languageStr := string(language)
preClasses := []string{"code-block"}
if languageStr == "mermaid" || languageStr == "math" {
preClasses = append(preClasses, "is-loading")
}
_, err := w.WriteString(`<pre class="` + strings.Join(preClasses, " ") + `">`)
if err != nil {
return
}
// include language-x class as part of commonmark spec
// the "display" class is used by "js/markup/math.js" to render the code element as a block
_, err = w.WriteString(`<code class="chroma language-` + string(language) + ` display">`)
if err != nil {
return
}
} else {
_, err := w.WriteString("</code></pre>")
if err != nil {
return
}
}
}),
),
math.NewExtension(
math.Enabled(setting.Markdown.EnableMath),
),
meta.Meta,
),
goldmark.WithParserOptions(
parser.WithAttribute(),
parser.WithAutoHeadingID(),
parser.WithASTTransformers(
util.Prioritized(NewASTTransformer(), 10000),
),
),
goldmark.WithRendererOptions(
html.WithUnsafe(),
),
)
// Override the original Tasklist renderer!
specMarkdown.Renderer().AddOptions(
renderer.WithNodeRenderers(
util.Prioritized(NewHTMLRenderer(), 10),
),
)
})
return specMarkdown
goldmarkMarkdown goldmark.Markdown
}
// actualRender renders Markdown to HTML without handling special links.
func actualRender(ctx *markup.RenderContext, input io.Reader, output io.Writer) error {
converter := SpecializedMarkdown()
func (r *GlodmarkRender) Convert(source []byte, writer io.Writer, opts ...parser.ParseOption) error {
return r.goldmarkMarkdown.Convert(source, writer, opts...)
}
func (r *GlodmarkRender) Renderer() renderer.Renderer {
return r.goldmarkMarkdown.Renderer()
}
func (r *GlodmarkRender) highlightingRenderer(w util.BufWriter, c highlighting.CodeBlockContext, entering bool) {
if entering {
language, _ := c.Language()
if language == nil {
language = []byte("text")
}
languageStr := string(language)
preClasses := []string{"code-block"}
if languageStr == "mermaid" || languageStr == "math" {
preClasses = append(preClasses, "is-loading")
}
err := r.ctx.RenderInternal.FormatWithSafeAttrs(w, `<pre class="%s">`, strings.Join(preClasses, " "))
if err != nil {
return
}
// include language-x class as part of commonmark spec
// the "display" class is used by "js/markup/math.js" to render the code element as a block
err = r.ctx.RenderInternal.FormatWithSafeAttrs(w, `<code class="chroma language-%s display">`, string(language))
if err != nil {
return
}
} else {
_, err := w.WriteString("</code></pre>")
if err != nil {
return
}
}
}
// SpecializedMarkdown sets up the Gitea specific markdown extensions
func SpecializedMarkdown(ctx *markup.RenderContext) *GlodmarkRender {
// TODO: it could use a pool to cache the renderers to reuse them with different contexts
// at the moment it is fast enough (see the benchmarks)
r := &GlodmarkRender{ctx: ctx}
r.goldmarkMarkdown = goldmark.New(
goldmark.WithExtensions(
extension.NewTable(extension.WithTableCellAlignMethod(extension.TableCellAlignAttribute)),
extension.Strikethrough,
extension.TaskList,
extension.DefinitionList,
common.FootnoteExtension,
highlighting.NewHighlighting(
highlighting.WithFormatOptions(
chromahtml.WithClasses(true),
chromahtml.PreventSurroundingPre(true),
),
highlighting.WithWrapperRenderer(r.highlightingRenderer),
),
math.NewExtension(&ctx.RenderInternal, math.Enabled(setting.Markdown.EnableMath)),
meta.Meta,
),
goldmark.WithParserOptions(
parser.WithAttribute(),
parser.WithAutoHeadingID(),
parser.WithASTTransformers(util.Prioritized(NewASTTransformer(&ctx.RenderInternal), 10000)),
),
goldmark.WithRendererOptions(html.WithUnsafe()),
)
// Override the original Tasklist renderer!
r.goldmarkMarkdown.Renderer().AddOptions(
renderer.WithNodeRenderers(util.Prioritized(NewHTMLRenderer(&ctx.RenderInternal), 10)),
)
return r
}
// render calls goldmark render to convert Markdown to HTML
// NOTE: The output of this method MUST get sanitized separately!!!
func render(ctx *markup.RenderContext, input io.Reader, output io.Writer) error {
converter := SpecializedMarkdown(ctx)
lw := &limitWriter{
w: output,
limit: setting.UI.MaxDisplayFileSize * 3,
@ -160,8 +164,8 @@ func actualRender(ctx *markup.RenderContext, input io.Reader, output io.Writer)
}
log.Warn("Unable to render markdown due to panic in goldmark: %v", err)
if log.IsDebug() {
log.Debug("Panic in markdown: %v\n%s", err, log.Stack(2))
if (!setting.IsProd && !setting.IsInTesting) || log.IsDebug() {
log.Error("Panic in markdown: %v\n%s", err, log.Stack(2))
}
}()
@ -200,26 +204,6 @@ func actualRender(ctx *markup.RenderContext, input io.Reader, output io.Writer)
return nil
}
// Note: The output of this method must get sanitized.
func render(ctx *markup.RenderContext, input io.Reader, output io.Writer) error {
defer func() {
err := recover()
if err == nil {
return
}
log.Warn("Unable to render markdown due to panic in goldmark - will return raw bytes")
if log.IsDebug() {
log.Debug("Panic in markdown: %v\n%s", err, log.Stack(2))
}
_, err = io.Copy(output, input)
if err != nil {
log.Error("io.Copy failed: %v", err)
}
}()
return actualRender(ctx, input, output)
}
// MarkupName describes markup's name
var MarkupName = "markdown"