From 172229966c9c69305d7b6b9b69552346343fe270 Mon Sep 17 00:00:00 2001
From: zeripath <art27@cantab.net>
Date: Wed, 20 Jan 2021 15:10:50 +0000
Subject: [PATCH] Prevent panic on fuzzer provided string (#14405)

* Prevent panic on fuzzer provided string

The fuzzer has found that providing a <body> tag with an attribute to
PostProcess causes a panic. This PR removes any rendered html or body
tags from the output.

Signed-off-by: Andrew Thornton <art27@cantab.net>

* Placate lint

* placate lint again

Signed-off-by: Andrew Thornton <art27@cantab.net>

* minor cleanup

Signed-off-by: Andrew Thornton <art27@cantab.net>
---
 modules/markup/html.go      | 38 +++++++++++++++++++++++++++----------
 modules/markup/html_test.go | 25 ++++++++++++++++++++++++
 2 files changed, 53 insertions(+), 10 deletions(-)

diff --git a/modules/markup/html.go b/modules/markup/html.go
index 9e4b1a3d5d..67aec7371c 100644
--- a/modules/markup/html.go
+++ b/modules/markup/html.go
@@ -317,9 +317,6 @@ func RenderEmoji(
 	return ctx.postProcess(rawHTML)
 }
 
-var byteBodyTag = []byte("<body>")
-var byteBodyTagClosing = []byte("</body>")
-
 func (ctx *postProcessCtx) postProcess(rawHTML []byte) ([]byte, error) {
 	if ctx.procs == nil {
 		ctx.procs = defaultProcessors
@@ -327,9 +324,9 @@ func (ctx *postProcessCtx) postProcess(rawHTML []byte) ([]byte, error) {
 
 	// give a generous extra 50 bytes
 	res := make([]byte, 0, len(rawHTML)+50)
-	res = append(res, byteBodyTag...)
+	res = append(res, "<html><body>"...)
 	res = append(res, rawHTML...)
-	res = append(res, byteBodyTagClosing...)
+	res = append(res, "</body></html>"...)
 
 	// parse the HTML
 	nodes, err := html.ParseFragment(bytes.NewReader(res), nil)
@@ -341,6 +338,31 @@ func (ctx *postProcessCtx) postProcess(rawHTML []byte) ([]byte, error) {
 		ctx.visitNode(node, true)
 	}
 
+	newNodes := make([]*html.Node, 0, len(nodes))
+
+	for _, node := range nodes {
+		if node.Data == "html" {
+			node = node.FirstChild
+			for node != nil && node.Data != "body" {
+				node = node.NextSibling
+			}
+		}
+		if node == nil {
+			continue
+		}
+		if node.Data == "body" {
+			child := node.FirstChild
+			for child != nil {
+				newNodes = append(newNodes, child)
+				child = child.NextSibling
+			}
+		} else {
+			newNodes = append(newNodes, node)
+		}
+	}
+
+	nodes = newNodes
+
 	// Create buffer in which the data will be placed again. We know that the
 	// length will be at least that of res; to spare a few alloc+copy, we
 	// reuse res, resetting its length to 0.
@@ -353,12 +375,8 @@ func (ctx *postProcessCtx) postProcess(rawHTML []byte) ([]byte, error) {
 		}
 	}
 
-	// remove initial parts - because Render creates a whole HTML page.
-	res = buf.Bytes()
-	res = res[bytes.Index(res, byteBodyTag)+len(byteBodyTag) : bytes.LastIndex(res, byteBodyTagClosing)]
-
 	// Everything done successfully, return parsed data.
-	return res, nil
+	return buf.Bytes(), nil
 }
 
 func (ctx *postProcessCtx) visitNode(node *html.Node, visitText bool) {
diff --git a/modules/markup/html_test.go b/modules/markup/html_test.go
index b04781489a..a78b936f87 100644
--- a/modules/markup/html_test.go
+++ b/modules/markup/html_test.go
@@ -383,3 +383,28 @@ func TestRender_ShortLinks(t *testing.T) {
 		`<p><a href="https://example.org" rel="nofollow">[[foobar]]</a></p>`,
 		`<p><a href="https://example.org" rel="nofollow">[[foobar]]</a></p>`)
 }
+
+func Test_ParseClusterFuzz(t *testing.T) {
+	setting.AppURL = AppURL
+	setting.AppSubURL = AppSubURL
+
+	var localMetas = map[string]string{
+		"user": "go-gitea",
+		"repo": "gitea",
+	}
+
+	data := "<A><maTH><tr><MN><bodY ÿ><temPlate></template><tH><tr></A><tH><d<bodY "
+
+	val, err := PostProcess([]byte(data), "https://example.com", localMetas, false)
+
+	assert.NoError(t, err)
+	assert.NotContains(t, string(val), "<html")
+
+	data = "<!DOCTYPE html>\n<A><maTH><tr><MN><bodY ÿ><temPlate></template><tH><tr></A><tH><d<bodY "
+
+	val, err = PostProcess([]byte(data), "https://example.com", localMetas, false)
+
+	assert.NoError(t, err)
+
+	assert.NotContains(t, string(val), "<html")
+}