Giteabot 2023-03-07 08:38:13 -05:00 committed by GitHub
parent ecae62837c
commit 10df304b2f
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 49 additions and 17 deletions

View file

@ -4,6 +4,7 @@
package typesniffer
import (
"bytes"
"fmt"
"io"
"net/http"
@ -24,8 +25,9 @@ const (
)
var (
svgTagRegex = regexp.MustCompile(`(?si)\A\s*(?:(<!--.*?-->|<!DOCTYPE\s+svg([\s:]+.*?>|>))\s*)*<svg[\s>\/]`)
svgTagInXMLRegex = regexp.MustCompile(`(?si)\A<\?xml\b.*?\?>\s*(?:(<!--.*?-->|<!DOCTYPE\s+svg([\s:]+.*?>|>))\s*)*<svg[\s>\/]`)
svgComment = regexp.MustCompile(`(?s)<!--.*?-->`)
svgTagRegex = regexp.MustCompile(`(?si)\A\s*(?:(<!DOCTYPE\s+svg([\s:]+.*?>|>))\s*)*<svg\b`)
svgTagInXMLRegex = regexp.MustCompile(`(?si)\A<\?xml\b.*?\?>\s*(?:(<!DOCTYPE\s+svg([\s:]+.*?>|>))\s*)*<svg\b`)
)
// SniffedType contains information about a blobs type.
@ -91,10 +93,17 @@ func DetectContentType(data []byte) SniffedType {
data = data[:sniffLen]
}
if (strings.Contains(ct, "text/plain") || strings.Contains(ct, "text/html")) && svgTagRegex.Match(data) ||
strings.Contains(ct, "text/xml") && svgTagInXMLRegex.Match(data) {
// SVG is unsupported. https://github.com/golang/go/issues/15888
ct = SvgMimeType
// SVG is unsupported by http.DetectContentType, https://github.com/golang/go/issues/15888
detectByHTML := strings.Contains(ct, "text/plain") || strings.Contains(ct, "text/html")
detectByXML := strings.Contains(ct, "text/xml")
if detectByHTML || detectByXML {
dataProcessed := svgComment.ReplaceAll(data, nil)
dataProcessed = bytes.TrimSpace(dataProcessed)
if detectByHTML && svgTagRegex.Match(dataProcessed) ||
detectByXML && svgTagInXMLRegex.Match(dataProcessed) {
ct = SvgMimeType
}
}
return SniffedType{ct}

View file

@ -28,7 +28,6 @@ func TestIsSvgImage(t *testing.T) {
assert.True(t, DetectContentType([]byte("<svg></svg>")).IsSvgImage())
assert.True(t, DetectContentType([]byte(" <svg></svg>")).IsSvgImage())
assert.True(t, DetectContentType([]byte(`<svg width="100"></svg>`)).IsSvgImage())
assert.True(t, DetectContentType([]byte("<svg/>")).IsSvgImage())
assert.True(t, DetectContentType([]byte(`<?xml version="1.0" encoding="UTF-8"?><svg></svg>`)).IsSvgImage())
assert.True(t, DetectContentType([]byte(`<!-- Comment -->
<svg></svg>`)).IsSvgImage())
@ -57,6 +56,10 @@ func TestIsSvgImage(t *testing.T) {
<!-- Multline
Comment -->
<svg></svg>`)).IsSvgImage())
// the DetectContentType should work for incomplete data, because only beginning bytes are used for detection
assert.True(t, DetectContentType([]byte(`<svg>....`)).IsSvgImage())
assert.False(t, DetectContentType([]byte{}).IsSvgImage())
assert.False(t, DetectContentType([]byte("svg")).IsSvgImage())
assert.False(t, DetectContentType([]byte("<svgfoo></svgfoo>")).IsSvgImage())
@ -68,6 +71,26 @@ func TestIsSvgImage(t *testing.T) {
assert.False(t, DetectContentType([]byte(`<?xml version="1.0" encoding="UTF-8"?>
<!-- <svg></svg> inside comment -->
<foo></foo>`)).IsSvgImage())
assert.False(t, DetectContentType([]byte(`
<!-- comment1 -->
<div>
<!-- comment2 -->
<svg></svg>
</div>
`)).IsSvgImage())
assert.False(t, DetectContentType([]byte(`
<!-- comment1
-->
<div>
<!-- comment2
-->
<svg></svg>
</div>
`)).IsSvgImage())
assert.False(t, DetectContentType([]byte(`<html><body><!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd"><svg></svg></body></html>`)).IsSvgImage())
assert.False(t, DetectContentType([]byte(`<html><body><?xml version="1.0" encoding="UTF-8"?><svg></svg></body></html>`)).IsSvgImage())
}
func TestIsPDF(t *testing.T) {