Refactor markup rendering to accept general "protocol:" prefix (#29276)

Follow #29024

Major changes:

* refactor validLinksPattern to fullURLPattern and add comments, now it
accepts "protocol:" prefix
* rename `IsLink*` to `IsFullURL*`, and remove unnecessray "mailto:"
check
* fix some comments (by the way)
* rename EmojiShortCodeRegex -> emojiShortCodeRegex (by the way)
This commit is contained in:
wxiaoguang 2024-02-21 18:08:08 +08:00 committed by GitHub
parent 4e536edaea
commit 6130522aa8
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 38 additions and 32 deletions

View file

@ -53,38 +53,38 @@ var (
// shortLinkPattern matches short but difficult to parse [[name|link|arg=test]] syntax
shortLinkPattern = regexp.MustCompile(`\[\[(.*?)\]\](\w*)`)
// anySHA1Pattern splits url containing SHA into parts
// anyHashPattern splits url containing SHA into parts
anyHashPattern = regexp.MustCompile(`https?://(?:\S+/){4,5}([0-9a-f]{40,64})(/[-+~_%.a-zA-Z0-9/]+)?(#[-+~_%.a-zA-Z0-9]+)?`)
// comparePattern matches "http://domain/org/repo/compare/COMMIT1...COMMIT2#hash"
comparePattern = regexp.MustCompile(`https?://(?:\S+/){4,5}([0-9a-f]{7,64})(\.\.\.?)([0-9a-f]{7,64})?(#[-+~_%.a-zA-Z0-9]+)?`)
validLinksPattern = regexp.MustCompile(`^[a-z][\w-]+://`)
// fullURLPattern matches full URL like "mailto:...", "https://..." and "ssh+git://..."
fullURLPattern = regexp.MustCompile(`^[a-z][-+\w]+:`)
// While this email regex is definitely not perfect and I'm sure you can come up
// with edge cases, it is still accepted by the CommonMark specification, as
// well as the HTML5 spec:
// emailRegex is definitely not perfect with edge cases,
// it is still accepted by the CommonMark specification, as well as the HTML5 spec:
// http://spec.commonmark.org/0.28/#email-address
// https://html.spec.whatwg.org/multipage/input.html#e-mail-state-(type%3Demail)
emailRegex = regexp.MustCompile("(?:\\s|^|\\(|\\[)([a-zA-Z0-9.!#$%&'*+\\/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\\.[a-zA-Z0-9]{2,}(?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)+)(?:\\s|$|\\)|\\]|;|,|\\?|!|\\.(\\s|$))")
// blackfriday extensions create IDs like fn:user-content-footnote
// blackfridayExtRegex is for blackfriday extensions create IDs like fn:user-content-footnote
blackfridayExtRegex = regexp.MustCompile(`[^:]*:user-content-`)
// EmojiShortCodeRegex find emoji by alias like :smile:
EmojiShortCodeRegex = regexp.MustCompile(`:[-+\w]+:`)
// emojiShortCodeRegex find emoji by alias like :smile:
emojiShortCodeRegex = regexp.MustCompile(`:[-+\w]+:`)
)
// CSS class for action keywords (e.g. "closes: #1")
const keywordClass = "issue-keyword"
// IsLink reports whether link fits valid format.
func IsLink(link []byte) bool {
return validLinksPattern.Match(link)
// IsFullURLBytes reports whether link fits valid format.
func IsFullURLBytes(link []byte) bool {
return fullURLPattern.Match(link)
}
func IsLinkStr(link string) bool {
return validLinksPattern.MatchString(link)
func IsFullURLString(link string) bool {
return fullURLPattern.MatchString(link)
}
// regexp for full links to issues/pulls
@ -399,7 +399,7 @@ func visitNode(ctx *RenderContext, procs []processor, node *html.Node) {
if attr.Key != "src" {
continue
}
if len(attr.Val) > 0 && !IsLinkStr(attr.Val) && !strings.HasPrefix(attr.Val, "data:image/") {
if len(attr.Val) > 0 && !IsFullURLString(attr.Val) && !strings.HasPrefix(attr.Val, "data:image/") {
attr.Val = util.URLJoin(ctx.Links.ResolveMediaLink(ctx.IsWiki), attr.Val)
}
attr.Val = camoHandleLink(attr.Val)
@ -650,7 +650,7 @@ func shortLinkProcessor(ctx *RenderContext, node *html.Node) {
if equalPos := strings.IndexByte(v, '='); equalPos == -1 {
// There is no equal in this argument; this is a mandatory arg
if props["name"] == "" {
if IsLinkStr(v) {
if IsFullURLString(v) {
// If we clearly see it is a link, we save it so
// But first we need to ensure, that if both mandatory args provided
@ -725,7 +725,7 @@ func shortLinkProcessor(ctx *RenderContext, node *html.Node) {
DataAtom: atom.A,
}
childNode.Parent = linkNode
absoluteLink := IsLinkStr(link)
absoluteLink := IsFullURLString(link)
if !absoluteLink {
if image {
link = strings.ReplaceAll(link, " ", "+")
@ -1059,7 +1059,7 @@ func emojiShortCodeProcessor(ctx *RenderContext, node *html.Node) {
start := 0
next := node.NextSibling
for node != nil && node != next && start < len(node.Data) {
m := EmojiShortCodeRegex.FindStringSubmatchIndex(node.Data[start:])
m := emojiShortCodeRegex.FindStringSubmatchIndex(node.Data[start:])
if m == nil {
return
}