Add sanitizer rules per renderer (#16110)
* Added sanitizer rules per renderer. * Updated documentation. Co-authored-by: techknowlogick <techknowlogick@gitea.io>
This commit is contained in:
		
							parent
							
								
									eb324a9402
								
							
						
					
					
						commit
						c9c7afda1a
					
				
					 10 changed files with 215 additions and 113 deletions
				
			
		|  | @ -907,13 +907,17 @@ Gitea supports customizing the sanitization policy for rendered HTML. The exampl | ||||||
| ELEMENT = span | ELEMENT = span | ||||||
| ALLOW_ATTR = class | ALLOW_ATTR = class | ||||||
| REGEXP = ^\s*((math(\s+|$)|inline(\s+|$)|display(\s+|$)))+ | REGEXP = ^\s*((math(\s+|$)|inline(\s+|$)|display(\s+|$)))+ | ||||||
|  | ALLOW_DATA_URI_IMAGES = true | ||||||
| ``` | ``` | ||||||
| 
 | 
 | ||||||
|  - `ELEMENT`: The element this policy applies to. Must be non-empty. |  - `ELEMENT`: The element this policy applies to. Must be non-empty. | ||||||
|  - `ALLOW_ATTR`: The attribute this policy allows. Must be non-empty. |  - `ALLOW_ATTR`: The attribute this policy allows. Must be non-empty. | ||||||
|  - `REGEXP`: A regex to match the contents of the attribute against. Must be present but may be empty for unconditional whitelisting of this attribute. |  - `REGEXP`: A regex to match the contents of the attribute against. Must be present but may be empty for unconditional whitelisting of this attribute. | ||||||
|  |  - `ALLOW_DATA_URI_IMAGES`: **false** Allow data uri images (`<img src="data:image/png;base64,..."/>`). | ||||||
| 
 | 
 | ||||||
| Multiple sanitisation rules can be defined by adding unique subsections, e.g. `[markup.sanitizer.TeX-2]`. | Multiple sanitisation rules can be defined by adding unique subsections, e.g. `[markup.sanitizer.TeX-2]`. | ||||||
|  | To apply a sanitisation rules only for a specify external renderer they must use the renderer name, e.g. `[markup.sanitizer.asciidoc.rule-1]`. | ||||||
|  | If the rule is defined above the renderer ini section or the name does not match a renderer it is applied to every renderer. | ||||||
| 
 | 
 | ||||||
| ## Time (`time`) | ## Time (`time`) | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -64,8 +64,8 @@ IS_INPUT_FILE = false | ||||||
| [markup.jupyter] | [markup.jupyter] | ||||||
| ENABLED = true | ENABLED = true | ||||||
| FILE_EXTENSIONS = .ipynb | FILE_EXTENSIONS = .ipynb | ||||||
| RENDER_COMMAND = "jupyter nbconvert --stdout --to html --template basic " | RENDER_COMMAND = "jupyter nbconvert --stdin --stdout --to html --template basic" | ||||||
| IS_INPUT_FILE = true | IS_INPUT_FILE = false | ||||||
| 
 | 
 | ||||||
| [markup.restructuredtext] | [markup.restructuredtext] | ||||||
| ENABLED = true | ENABLED = true | ||||||
|  | @ -90,15 +90,50 @@ FILE_EXTENSIONS = .md,.markdown | ||||||
| RENDER_COMMAND  = pandoc -f markdown -t html --katex | RENDER_COMMAND  = pandoc -f markdown -t html --katex | ||||||
| ``` | ``` | ||||||
| 
 | 
 | ||||||
| You must define `ELEMENT`, `ALLOW_ATTR`, and `REGEXP` in each section. | You must define `ELEMENT` and `ALLOW_ATTR` in each section. | ||||||
| 
 | 
 | ||||||
| To define multiple entries, add a unique alphanumeric suffix (e.g., `[markup.sanitizer.1]` and `[markup.sanitizer.something]`). | To define multiple entries, add a unique alphanumeric suffix (e.g., `[markup.sanitizer.1]` and `[markup.sanitizer.something]`). | ||||||
| 
 | 
 | ||||||
|  | To apply a sanitisation rules only for a specify external renderer they must use the renderer name, e.g. `[markup.sanitizer.asciidoc.rule-1]`, `[markup.sanitizer.<renderer>.rule-1]`. | ||||||
|  | 
 | ||||||
|  | **Note**: If the rule is defined above the renderer ini section or the name does not match a renderer it is applied to every renderer. | ||||||
|  | 
 | ||||||
| Once your configuration changes have been made, restart Gitea to have changes take effect. | Once your configuration changes have been made, restart Gitea to have changes take effect. | ||||||
| 
 | 
 | ||||||
| **Note**: Prior to Gitea 1.12 there was a single `markup.sanitiser` section with keys that were redefined for multiple rules, however, | **Note**: Prior to Gitea 1.12 there was a single `markup.sanitiser` section with keys that were redefined for multiple rules, however, | ||||||
| there were significant problems with this method of configuration necessitating configuration through multiple sections. | there were significant problems with this method of configuration necessitating configuration through multiple sections. | ||||||
| 
 | 
 | ||||||
|  | ### Example: Office DOCX | ||||||
|  | 
 | ||||||
|  | Display Office DOCX files with [`pandoc`](https://pandoc.org/): | ||||||
|  | ```ini | ||||||
|  | [markup.docx] | ||||||
|  | ENABLED = true | ||||||
|  | FILE_EXTENSIONS = .docx | ||||||
|  | RENDER_COMMAND = "pandoc --from docx --to html --self-contained --template /path/to/basic.html" | ||||||
|  | 
 | ||||||
|  | [markup.sanitizer.docx.img] | ||||||
|  | ALLOW_DATA_URI_IMAGES = true | ||||||
|  | ``` | ||||||
|  | 
 | ||||||
|  | The template file has the following content: | ||||||
|  | ``` | ||||||
|  | $body$ | ||||||
|  | ``` | ||||||
|  | 
 | ||||||
|  | ### Example: Jupyter Notebook | ||||||
|  | 
 | ||||||
|  | Display Jupyter Notebook files with [`nbconvert`](https://github.com/jupyter/nbconvert): | ||||||
|  | ```ini | ||||||
|  | [markup.jupyter] | ||||||
|  | ENABLED = true | ||||||
|  | FILE_EXTENSIONS = .ipynb | ||||||
|  | RENDER_COMMAND = "jupyter-nbconvert --stdin --stdout --to html --template basic" | ||||||
|  | 
 | ||||||
|  | [markup.sanitizer.jupyter.img] | ||||||
|  | ALLOW_DATA_URI_IMAGES = true | ||||||
|  | ``` | ||||||
|  | 
 | ||||||
| ## Customizing CSS | ## Customizing CSS | ||||||
| The external renderer is specified in the .ini in the format `[markup.XXXXX]` and the HTML supplied by your external renderer will be wrapped in a `<div>` with classes `markup` and `XXXXX`. The `markup` class provides out of the box styling (as does `markdown` if `XXXXX` is `markdown`). Otherwise you can use these classes to specifically target the contents of your rendered HTML.  | The external renderer is specified in the .ini in the format `[markup.XXXXX]` and the HTML supplied by your external renderer will be wrapped in a `<div>` with classes `markup` and `XXXXX`. The `markup` class provides out of the box styling (as does `markdown` if `XXXXX` is `markdown`). Otherwise you can use these classes to specifically target the contents of your rendered HTML.  | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -10,6 +10,7 @@ import ( | ||||||
| 	"html" | 	"html" | ||||||
| 	"io" | 	"io" | ||||||
| 	"io/ioutil" | 	"io/ioutil" | ||||||
|  | 	"regexp" | ||||||
| 	"strconv" | 	"strconv" | ||||||
| 
 | 
 | ||||||
| 	"code.gitea.io/gitea/modules/csv" | 	"code.gitea.io/gitea/modules/csv" | ||||||
|  | @ -38,6 +39,15 @@ func (Renderer) Extensions() []string { | ||||||
| 	return []string{".csv", ".tsv"} | 	return []string{".csv", ".tsv"} | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | // SanitizerRules implements markup.Renderer | ||||||
|  | func (Renderer) SanitizerRules() []setting.MarkupSanitizerRule { | ||||||
|  | 	return []setting.MarkupSanitizerRule{ | ||||||
|  | 		{Element: "table", AllowAttr: "class", Regexp: regexp.MustCompile(`data-table`)}, | ||||||
|  | 		{Element: "th", AllowAttr: "class", Regexp: regexp.MustCompile(`line-num`)}, | ||||||
|  | 		{Element: "td", AllowAttr: "class", Regexp: regexp.MustCompile(`line-num`)}, | ||||||
|  | 	} | ||||||
|  | } | ||||||
|  | 
 | ||||||
| func writeField(w io.Writer, element, class, field string) error { | func writeField(w io.Writer, element, class, field string) error { | ||||||
| 	if _, err := io.WriteString(w, "<"); err != nil { | 	if _, err := io.WriteString(w, "<"); err != nil { | ||||||
| 		return err | 		return err | ||||||
|  |  | ||||||
							
								
								
									
										7
									
								
								modules/markup/external/external.go
									
										
									
									
										vendored
									
									
								
							
							
						
						
									
										7
									
								
								modules/markup/external/external.go
									
										
									
									
										vendored
									
									
								
							|  | @ -30,7 +30,7 @@ func RegisterRenderers() { | ||||||
| 
 | 
 | ||||||
| // Renderer implements markup.Renderer for external tools | // Renderer implements markup.Renderer for external tools | ||||||
| type Renderer struct { | type Renderer struct { | ||||||
| 	setting.MarkupRenderer | 	*setting.MarkupRenderer | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| // Name returns the external tool name | // Name returns the external tool name | ||||||
|  | @ -48,6 +48,11 @@ func (p *Renderer) Extensions() []string { | ||||||
| 	return p.FileExtensions | 	return p.FileExtensions | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | // SanitizerRules implements markup.Renderer | ||||||
|  | func (p *Renderer) SanitizerRules() []setting.MarkupSanitizerRule { | ||||||
|  | 	return p.MarkupSanitizerRules | ||||||
|  | } | ||||||
|  | 
 | ||||||
| func envMark(envName string) string { | func envMark(envName string) string { | ||||||
| 	if runtime.GOOS == "windows" { | 	if runtime.GOOS == "windows" { | ||||||
| 		return "%" + envName + "%" | 		return "%" + envName + "%" | ||||||
|  |  | ||||||
|  | @ -112,7 +112,7 @@ func TestRender_links(t *testing.T) { | ||||||
| 
 | 
 | ||||||
| 	defaultCustom := setting.Markdown.CustomURLSchemes | 	defaultCustom := setting.Markdown.CustomURLSchemes | ||||||
| 	setting.Markdown.CustomURLSchemes = []string{"ftp", "magnet"} | 	setting.Markdown.CustomURLSchemes = []string{"ftp", "magnet"} | ||||||
| 	ReplaceSanitizer() | 	InitializeSanitizer() | ||||||
| 	CustomLinkURLSchemes(setting.Markdown.CustomURLSchemes) | 	CustomLinkURLSchemes(setting.Markdown.CustomURLSchemes) | ||||||
| 
 | 
 | ||||||
| 	test( | 	test( | ||||||
|  | @ -192,7 +192,7 @@ func TestRender_links(t *testing.T) { | ||||||
| 
 | 
 | ||||||
| 	// Restore previous settings | 	// Restore previous settings | ||||||
| 	setting.Markdown.CustomURLSchemes = defaultCustom | 	setting.Markdown.CustomURLSchemes = defaultCustom | ||||||
| 	ReplaceSanitizer() | 	InitializeSanitizer() | ||||||
| 	CustomLinkURLSchemes(setting.Markdown.CustomURLSchemes) | 	CustomLinkURLSchemes(setting.Markdown.CustomURLSchemes) | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -199,7 +199,7 @@ func actualRender(ctx *markup.RenderContext, input io.Reader, output io.Writer) | ||||||
| 		} | 		} | ||||||
| 		_ = lw.Close() | 		_ = lw.Close() | ||||||
| 	}() | 	}() | ||||||
| 	buf := markup.SanitizeReader(rd) | 	buf := markup.SanitizeReader(rd, "") | ||||||
| 	_, err := io.Copy(output, buf) | 	_, err := io.Copy(output, buf) | ||||||
| 	return err | 	return err | ||||||
| } | } | ||||||
|  | @ -215,7 +215,7 @@ func render(ctx *markup.RenderContext, input io.Reader, output io.Writer) error | ||||||
| 		if log.IsDebug() { | 		if log.IsDebug() { | ||||||
| 			log.Debug("Panic in markdown: %v\n%s", err, string(log.Stack(2))) | 			log.Debug("Panic in markdown: %v\n%s", err, string(log.Stack(2))) | ||||||
| 		} | 		} | ||||||
| 		ret := markup.SanitizeReader(input) | 		ret := markup.SanitizeReader(input, "") | ||||||
| 		_, err = io.Copy(output, ret) | 		_, err = io.Copy(output, ret) | ||||||
| 		if err != nil { | 		if err != nil { | ||||||
| 			log.Error("SanitizeReader failed: %v", err) | 			log.Error("SanitizeReader failed: %v", err) | ||||||
|  | @ -249,6 +249,11 @@ func (Renderer) Extensions() []string { | ||||||
| 	return setting.Markdown.FileExtensions | 	return setting.Markdown.FileExtensions | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | // SanitizerRules implements markup.Renderer | ||||||
|  | func (Renderer) SanitizerRules() []setting.MarkupSanitizerRule { | ||||||
|  | 	return []setting.MarkupSanitizerRule{} | ||||||
|  | } | ||||||
|  | 
 | ||||||
| // Render implements markup.Renderer | // Render implements markup.Renderer | ||||||
| func (Renderer) Render(ctx *markup.RenderContext, input io.Reader, output io.Writer) error { | func (Renderer) Render(ctx *markup.RenderContext, input io.Reader, output io.Writer) error { | ||||||
| 	return render(ctx, input, output) | 	return render(ctx, input, output) | ||||||
|  |  | ||||||
|  | @ -13,6 +13,7 @@ import ( | ||||||
| 
 | 
 | ||||||
| 	"code.gitea.io/gitea/modules/highlight" | 	"code.gitea.io/gitea/modules/highlight" | ||||||
| 	"code.gitea.io/gitea/modules/markup" | 	"code.gitea.io/gitea/modules/markup" | ||||||
|  | 	"code.gitea.io/gitea/modules/setting" | ||||||
| 	"code.gitea.io/gitea/modules/util" | 	"code.gitea.io/gitea/modules/util" | ||||||
| 
 | 
 | ||||||
| 	"github.com/alecthomas/chroma" | 	"github.com/alecthomas/chroma" | ||||||
|  | @ -41,6 +42,11 @@ func (Renderer) Extensions() []string { | ||||||
| 	return []string{".org"} | 	return []string{".org"} | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | // SanitizerRules implements markup.Renderer | ||||||
|  | func (Renderer) SanitizerRules() []setting.MarkupSanitizerRule { | ||||||
|  | 	return []setting.MarkupSanitizerRule{} | ||||||
|  | } | ||||||
|  | 
 | ||||||
| // Render renders orgmode rawbytes to HTML | // Render renders orgmode rawbytes to HTML | ||||||
| func Render(ctx *markup.RenderContext, input io.Reader, output io.Writer) error { | func Render(ctx *markup.RenderContext, input io.Reader, output io.Writer) error { | ||||||
| 	htmlWriter := org.NewHTMLWriter() | 	htmlWriter := org.NewHTMLWriter() | ||||||
|  |  | ||||||
|  | @ -81,6 +81,7 @@ type Renderer interface { | ||||||
| 	Name() string // markup format name | 	Name() string // markup format name | ||||||
| 	Extensions() []string | 	Extensions() []string | ||||||
| 	NeedPostProcess() bool | 	NeedPostProcess() bool | ||||||
|  | 	SanitizerRules() []setting.MarkupSanitizerRule | ||||||
| 	Render(ctx *RenderContext, input io.Reader, output io.Writer) error | 	Render(ctx *RenderContext, input io.Reader, output io.Writer) error | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | @ -136,37 +137,32 @@ func render(ctx *RenderContext, renderer Renderer, input io.Reader, output io.Wr | ||||||
| 		_ = pw.Close() | 		_ = pw.Close() | ||||||
| 	}() | 	}() | ||||||
| 
 | 
 | ||||||
| 	if renderer.NeedPostProcess() { | 	pr2, pw2 := io.Pipe() | ||||||
| 		pr2, pw2 := io.Pipe() | 	defer func() { | ||||||
| 		defer func() { | 		_ = pr2.Close() | ||||||
| 			_ = pr2.Close() | 		_ = pw2.Close() | ||||||
| 			_ = pw2.Close() | 	}() | ||||||
| 		}() |  | ||||||
| 
 | 
 | ||||||
| 		wg.Add(1) | 	wg.Add(1) | ||||||
| 		go func() { | 	go func() { | ||||||
| 			buf := SanitizeReader(pr2) | 		buf := SanitizeReader(pr2, renderer.Name()) | ||||||
| 			_, err = io.Copy(output, buf) | 		_, err = io.Copy(output, buf) | ||||||
| 			_ = pr2.Close() | 		_ = pr2.Close() | ||||||
| 			wg.Done() | 		wg.Done() | ||||||
| 		}() | 	}() | ||||||
| 
 | 
 | ||||||
| 		wg.Add(1) | 	wg.Add(1) | ||||||
| 		go func() { | 	go func() { | ||||||
|  | 		if renderer.NeedPostProcess() { | ||||||
| 			err = PostProcess(ctx, pr, pw2) | 			err = PostProcess(ctx, pr, pw2) | ||||||
| 			_ = pr.Close() | 		} else { | ||||||
| 			_ = pw2.Close() | 			_, err = io.Copy(pw2, pr) | ||||||
| 			wg.Done() | 		} | ||||||
| 		}() | 		_ = pr.Close() | ||||||
| 	} else { | 		_ = pw2.Close() | ||||||
| 		wg.Add(1) | 		wg.Done() | ||||||
| 		go func() { | 	}() | ||||||
| 			buf := SanitizeReader(pr) | 
 | ||||||
| 			_, err = io.Copy(output, buf) |  | ||||||
| 			_ = pr.Close() |  | ||||||
| 			wg.Done() |  | ||||||
| 		}() |  | ||||||
| 	} |  | ||||||
| 	if err1 := renderer.Render(ctx, input, pw); err1 != nil { | 	if err1 := renderer.Render(ctx, input, pw); err1 != nil { | ||||||
| 		return err1 | 		return err1 | ||||||
| 	} | 	} | ||||||
|  |  | ||||||
|  | @ -19,8 +19,9 @@ import ( | ||||||
| // Sanitizer is a protection wrapper of *bluemonday.Policy which does not allow | // Sanitizer is a protection wrapper of *bluemonday.Policy which does not allow | ||||||
| // any modification to the underlying policies once it's been created. | // any modification to the underlying policies once it's been created. | ||||||
| type Sanitizer struct { | type Sanitizer struct { | ||||||
| 	policy *bluemonday.Policy | 	defaultPolicy    *bluemonday.Policy | ||||||
| 	init   sync.Once | 	rendererPolicies map[string]*bluemonday.Policy | ||||||
|  | 	init             sync.Once | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| var sanitizer = &Sanitizer{} | var sanitizer = &Sanitizer{} | ||||||
|  | @ -30,47 +31,57 @@ var sanitizer = &Sanitizer{} | ||||||
| // entire application lifecycle. | // entire application lifecycle. | ||||||
| func NewSanitizer() { | func NewSanitizer() { | ||||||
| 	sanitizer.init.Do(func() { | 	sanitizer.init.Do(func() { | ||||||
| 		ReplaceSanitizer() | 		InitializeSanitizer() | ||||||
| 	}) | 	}) | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| // ReplaceSanitizer replaces the current sanitizer to account for changes in settings | // InitializeSanitizer (re)initializes the current sanitizer to account for changes in settings | ||||||
| func ReplaceSanitizer() { | func InitializeSanitizer() { | ||||||
| 	sanitizer.policy = bluemonday.UGCPolicy() | 	sanitizer.rendererPolicies = map[string]*bluemonday.Policy{} | ||||||
|  | 	sanitizer.defaultPolicy = createDefaultPolicy() | ||||||
|  | 
 | ||||||
|  | 	for name, renderer := range renderers { | ||||||
|  | 		sanitizerRules := renderer.SanitizerRules() | ||||||
|  | 		if len(sanitizerRules) > 0 { | ||||||
|  | 			policy := createDefaultPolicy() | ||||||
|  | 			addSanitizerRules(policy, sanitizerRules) | ||||||
|  | 			sanitizer.rendererPolicies[name] = policy | ||||||
|  | 		} | ||||||
|  | 	} | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | func createDefaultPolicy() *bluemonday.Policy { | ||||||
|  | 	policy := bluemonday.UGCPolicy() | ||||||
| 	// For Chroma markdown plugin | 	// For Chroma markdown plugin | ||||||
| 	sanitizer.policy.AllowAttrs("class").Matching(regexp.MustCompile(`^is-loading$`)).OnElements("pre") | 	policy.AllowAttrs("class").Matching(regexp.MustCompile(`^is-loading$`)).OnElements("pre") | ||||||
| 	sanitizer.policy.AllowAttrs("class").Matching(regexp.MustCompile(`^(chroma )?language-[\w-]+$`)).OnElements("code") | 	policy.AllowAttrs("class").Matching(regexp.MustCompile(`^(chroma )?language-[\w-]+$`)).OnElements("code") | ||||||
| 
 | 
 | ||||||
| 	// Checkboxes | 	// Checkboxes | ||||||
| 	sanitizer.policy.AllowAttrs("type").Matching(regexp.MustCompile(`^checkbox$`)).OnElements("input") | 	policy.AllowAttrs("type").Matching(regexp.MustCompile(`^checkbox$`)).OnElements("input") | ||||||
| 	sanitizer.policy.AllowAttrs("checked", "disabled", "data-source-position").OnElements("input") | 	policy.AllowAttrs("checked", "disabled", "data-source-position").OnElements("input") | ||||||
| 
 | 
 | ||||||
| 	// Custom URL-Schemes | 	// Custom URL-Schemes | ||||||
| 	if len(setting.Markdown.CustomURLSchemes) > 0 { | 	if len(setting.Markdown.CustomURLSchemes) > 0 { | ||||||
| 		sanitizer.policy.AllowURLSchemes(setting.Markdown.CustomURLSchemes...) | 		policy.AllowURLSchemes(setting.Markdown.CustomURLSchemes...) | ||||||
| 	} | 	} | ||||||
| 
 | 
 | ||||||
| 	// Allow classes for anchors | 	// Allow classes for anchors | ||||||
| 	sanitizer.policy.AllowAttrs("class").Matching(regexp.MustCompile(`ref-issue`)).OnElements("a") | 	policy.AllowAttrs("class").Matching(regexp.MustCompile(`ref-issue`)).OnElements("a") | ||||||
| 
 | 
 | ||||||
| 	// Allow classes for task lists | 	// Allow classes for task lists | ||||||
| 	sanitizer.policy.AllowAttrs("class").Matching(regexp.MustCompile(`task-list-item`)).OnElements("li") | 	policy.AllowAttrs("class").Matching(regexp.MustCompile(`task-list-item`)).OnElements("li") | ||||||
| 
 | 
 | ||||||
| 	// Allow icons | 	// Allow icons | ||||||
| 	sanitizer.policy.AllowAttrs("class").Matching(regexp.MustCompile(`^icon(\s+[\p{L}\p{N}_-]+)+$`)).OnElements("i") | 	policy.AllowAttrs("class").Matching(regexp.MustCompile(`^icon(\s+[\p{L}\p{N}_-]+)+$`)).OnElements("i") | ||||||
| 
 | 
 | ||||||
| 	// Allow unlabelled labels | 	// Allow unlabelled labels | ||||||
| 	sanitizer.policy.AllowNoAttrs().OnElements("label") | 	policy.AllowNoAttrs().OnElements("label") | ||||||
| 
 | 
 | ||||||
| 	// Allow classes for emojis | 	// Allow classes for emojis | ||||||
| 	sanitizer.policy.AllowAttrs("class").Matching(regexp.MustCompile(`emoji`)).OnElements("img") | 	policy.AllowAttrs("class").Matching(regexp.MustCompile(`emoji`)).OnElements("img") | ||||||
| 
 | 
 | ||||||
| 	// Allow icons, emojis, chroma syntax and keyword markup on span | 	// Allow icons, emojis, chroma syntax and keyword markup on span | ||||||
| 	sanitizer.policy.AllowAttrs("class").Matching(regexp.MustCompile(`^((icon(\s+[\p{L}\p{N}_-]+)+)|(emoji))$|^([a-z][a-z0-9]{0,2})$|^` + keywordClass + `$`)).OnElements("span") | 	policy.AllowAttrs("class").Matching(regexp.MustCompile(`^((icon(\s+[\p{L}\p{N}_-]+)+)|(emoji))$|^([a-z][a-z0-9]{0,2})$|^` + keywordClass + `$`)).OnElements("span") | ||||||
| 
 |  | ||||||
| 	// Allow data tables |  | ||||||
| 	sanitizer.policy.AllowAttrs("class").Matching(regexp.MustCompile(`data-table`)).OnElements("table") |  | ||||||
| 	sanitizer.policy.AllowAttrs("class").Matching(regexp.MustCompile(`line-num`)).OnElements("th", "td") |  | ||||||
| 
 | 
 | ||||||
| 	// Allow generally safe attributes | 	// Allow generally safe attributes | ||||||
| 	generalSafeAttrs := []string{"abbr", "accept", "accept-charset", | 	generalSafeAttrs := []string{"abbr", "accept", "accept-charset", | ||||||
|  | @ -101,18 +112,29 @@ func ReplaceSanitizer() { | ||||||
| 		"abbr", "bdo", "cite", "dfn", "mark", "small", "span", "time", "wbr", | 		"abbr", "bdo", "cite", "dfn", "mark", "small", "span", "time", "wbr", | ||||||
| 	} | 	} | ||||||
| 
 | 
 | ||||||
| 	sanitizer.policy.AllowAttrs(generalSafeAttrs...).OnElements(generalSafeElements...) | 	policy.AllowAttrs(generalSafeAttrs...).OnElements(generalSafeElements...) | ||||||
| 
 | 
 | ||||||
| 	sanitizer.policy.AllowAttrs("itemscope", "itemtype").OnElements("div") | 	policy.AllowAttrs("itemscope", "itemtype").OnElements("div") | ||||||
| 
 | 
 | ||||||
| 	// FIXME: Need to handle longdesc in img but there is no easy way to do it | 	// FIXME: Need to handle longdesc in img but there is no easy way to do it | ||||||
| 
 | 
 | ||||||
| 	// Custom keyword markup | 	// Custom keyword markup | ||||||
| 	for _, rule := range setting.ExternalSanitizerRules { | 	addSanitizerRules(policy, setting.ExternalSanitizerRules) | ||||||
| 		if rule.Regexp != nil { | 
 | ||||||
| 			sanitizer.policy.AllowAttrs(rule.AllowAttr).Matching(rule.Regexp).OnElements(rule.Element) | 	return policy | ||||||
| 		} else { | } | ||||||
| 			sanitizer.policy.AllowAttrs(rule.AllowAttr).OnElements(rule.Element) | 
 | ||||||
|  | func addSanitizerRules(policy *bluemonday.Policy, rules []setting.MarkupSanitizerRule) { | ||||||
|  | 	for _, rule := range rules { | ||||||
|  | 		if rule.AllowDataURIImages { | ||||||
|  | 			policy.AllowDataURIImages() | ||||||
|  | 		} | ||||||
|  | 		if rule.Element != "" { | ||||||
|  | 			if rule.Regexp != nil { | ||||||
|  | 				policy.AllowAttrs(rule.AllowAttr).Matching(rule.Regexp).OnElements(rule.Element) | ||||||
|  | 			} else { | ||||||
|  | 				policy.AllowAttrs(rule.AllowAttr).OnElements(rule.Element) | ||||||
|  | 			} | ||||||
| 		} | 		} | ||||||
| 	} | 	} | ||||||
| } | } | ||||||
|  | @ -120,11 +142,15 @@ func ReplaceSanitizer() { | ||||||
| // Sanitize takes a string that contains a HTML fragment or document and applies policy whitelist. | // Sanitize takes a string that contains a HTML fragment or document and applies policy whitelist. | ||||||
| func Sanitize(s string) string { | func Sanitize(s string) string { | ||||||
| 	NewSanitizer() | 	NewSanitizer() | ||||||
| 	return sanitizer.policy.Sanitize(s) | 	return sanitizer.defaultPolicy.Sanitize(s) | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| // SanitizeReader sanitizes a Reader | // SanitizeReader sanitizes a Reader | ||||||
| func SanitizeReader(r io.Reader) *bytes.Buffer { | func SanitizeReader(r io.Reader, renderer string) *bytes.Buffer { | ||||||
| 	NewSanitizer() | 	NewSanitizer() | ||||||
| 	return sanitizer.policy.SanitizeReader(r) | 	policy, exist := sanitizer.rendererPolicies[renderer] | ||||||
|  | 	if !exist { | ||||||
|  | 		policy = sanitizer.defaultPolicy | ||||||
|  | 	} | ||||||
|  | 	return policy.SanitizeReader(r) | ||||||
| } | } | ||||||
|  |  | ||||||
|  | @ -15,31 +15,34 @@ import ( | ||||||
| 
 | 
 | ||||||
| // ExternalMarkupRenderers represents the external markup renderers | // ExternalMarkupRenderers represents the external markup renderers | ||||||
| var ( | var ( | ||||||
| 	ExternalMarkupRenderers []MarkupRenderer | 	ExternalMarkupRenderers []*MarkupRenderer | ||||||
| 	ExternalSanitizerRules  []MarkupSanitizerRule | 	ExternalSanitizerRules  []MarkupSanitizerRule | ||||||
| ) | ) | ||||||
| 
 | 
 | ||||||
| // MarkupRenderer defines the external parser configured in ini | // MarkupRenderer defines the external parser configured in ini | ||||||
| type MarkupRenderer struct { | type MarkupRenderer struct { | ||||||
| 	Enabled         bool | 	Enabled              bool | ||||||
| 	MarkupName      string | 	MarkupName           string | ||||||
| 	Command         string | 	Command              string | ||||||
| 	FileExtensions  []string | 	FileExtensions       []string | ||||||
| 	IsInputFile     bool | 	IsInputFile          bool | ||||||
| 	NeedPostProcess bool | 	NeedPostProcess      bool | ||||||
|  | 	MarkupSanitizerRules []MarkupSanitizerRule | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| // MarkupSanitizerRule defines the policy for whitelisting attributes on | // MarkupSanitizerRule defines the policy for whitelisting attributes on | ||||||
| // certain elements. | // certain elements. | ||||||
| type MarkupSanitizerRule struct { | type MarkupSanitizerRule struct { | ||||||
| 	Element   string | 	Element            string | ||||||
| 	AllowAttr string | 	AllowAttr          string | ||||||
| 	Regexp    *regexp.Regexp | 	Regexp             *regexp.Regexp | ||||||
|  | 	AllowDataURIImages bool | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| func newMarkup() { | func newMarkup() { | ||||||
| 	ExternalMarkupRenderers = make([]MarkupRenderer, 0, 10) | 	ExternalMarkupRenderers = make([]*MarkupRenderer, 0, 10) | ||||||
| 	ExternalSanitizerRules = make([]MarkupSanitizerRule, 0, 10) | 	ExternalSanitizerRules = make([]MarkupSanitizerRule, 0, 10) | ||||||
|  | 
 | ||||||
| 	for _, sec := range Cfg.Section("markup").ChildSections() { | 	for _, sec := range Cfg.Section("markup").ChildSections() { | ||||||
| 		name := strings.TrimPrefix(sec.Name(), "markup.") | 		name := strings.TrimPrefix(sec.Name(), "markup.") | ||||||
| 		if name == "" { | 		if name == "" { | ||||||
|  | @ -56,50 +59,62 @@ func newMarkup() { | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| func newMarkupSanitizer(name string, sec *ini.Section) { | func newMarkupSanitizer(name string, sec *ini.Section) { | ||||||
| 	haveElement := sec.HasKey("ELEMENT") | 	rule, ok := createMarkupSanitizerRule(name, sec) | ||||||
| 	haveAttr := sec.HasKey("ALLOW_ATTR") | 	if ok { | ||||||
| 	haveRegexp := sec.HasKey("REGEXP") | 		if strings.HasPrefix(name, "sanitizer.") { | ||||||
|  | 			names := strings.SplitN(strings.TrimPrefix(name, "sanitizer."), ".", 2) | ||||||
|  | 			name = names[0] | ||||||
|  | 		} | ||||||
|  | 		for _, renderer := range ExternalMarkupRenderers { | ||||||
|  | 			if name == renderer.MarkupName { | ||||||
|  | 				renderer.MarkupSanitizerRules = append(renderer.MarkupSanitizerRules, rule) | ||||||
|  | 				return | ||||||
|  | 			} | ||||||
|  | 		} | ||||||
|  | 		ExternalSanitizerRules = append(ExternalSanitizerRules, rule) | ||||||
|  | 	} | ||||||
|  | } | ||||||
| 
 | 
 | ||||||
| 	if !haveElement && !haveAttr && !haveRegexp { | func createMarkupSanitizerRule(name string, sec *ini.Section) (MarkupSanitizerRule, bool) { | ||||||
| 		log.Warn("Skipping empty section: markup.%s.", name) | 	var rule MarkupSanitizerRule | ||||||
| 		return | 
 | ||||||
|  | 	ok := false | ||||||
|  | 	if sec.HasKey("ALLOW_DATA_URI_IMAGES") { | ||||||
|  | 		rule.AllowDataURIImages = sec.Key("ALLOW_DATA_URI_IMAGES").MustBool(false) | ||||||
|  | 		ok = true | ||||||
| 	} | 	} | ||||||
| 
 | 
 | ||||||
| 	if !haveElement || !haveAttr || !haveRegexp { | 	if sec.HasKey("ELEMENT") || sec.HasKey("ALLOW_ATTR") { | ||||||
| 		log.Error("Missing required keys from markup.%s. Must have all three of ELEMENT, ALLOW_ATTR, and REGEXP defined!", name) | 		rule.Element = sec.Key("ELEMENT").Value() | ||||||
| 		return | 		rule.AllowAttr = sec.Key("ALLOW_ATTR").Value() | ||||||
| 	} |  | ||||||
| 
 | 
 | ||||||
| 	elements := sec.Key("ELEMENT").Value() | 		if rule.Element == "" || rule.AllowAttr == "" { | ||||||
| 	allowAttrs := sec.Key("ALLOW_ATTR").Value() | 			log.Error("Missing required values from markup.%s. Must have ELEMENT and ALLOW_ATTR defined!", name) | ||||||
| 	regexpStr := sec.Key("REGEXP").Value() | 			return rule, false | ||||||
| 
 |  | ||||||
| 	if regexpStr == "" { |  | ||||||
| 		rule := MarkupSanitizerRule{ |  | ||||||
| 			Element:   elements, |  | ||||||
| 			AllowAttr: allowAttrs, |  | ||||||
| 			Regexp:    nil, |  | ||||||
| 		} | 		} | ||||||
| 
 | 
 | ||||||
| 		ExternalSanitizerRules = append(ExternalSanitizerRules, rule) | 		regexpStr := sec.Key("REGEXP").Value() | ||||||
| 		return | 		if regexpStr != "" { | ||||||
|  | 			// Validate when parsing the config that this is a valid regular | ||||||
|  | 			// expression. Then we can use regexp.MustCompile(...) later. | ||||||
|  | 			compiled, err := regexp.Compile(regexpStr) | ||||||
|  | 			if err != nil { | ||||||
|  | 				log.Error("In markup.%s: REGEXP (%s) failed to compile: %v", name, regexpStr, err) | ||||||
|  | 				return rule, false | ||||||
|  | 			} | ||||||
|  | 
 | ||||||
|  | 			rule.Regexp = compiled | ||||||
|  | 		} | ||||||
|  | 
 | ||||||
|  | 		ok = true | ||||||
| 	} | 	} | ||||||
| 
 | 
 | ||||||
| 	// Validate when parsing the config that this is a valid regular | 	if !ok { | ||||||
| 	// expression. Then we can use regexp.MustCompile(...) later. | 		log.Error("Missing required keys from markup.%s. Must have ELEMENT and ALLOW_ATTR or ALLOW_DATA_URI_IMAGES defined!", name) | ||||||
| 	compiled, err := regexp.Compile(regexpStr) | 		return rule, false | ||||||
| 	if err != nil { |  | ||||||
| 		log.Error("In module.%s: REGEXP (%s) at definition %d failed to compile: %v", regexpStr, name, err) |  | ||||||
| 		return |  | ||||||
| 	} | 	} | ||||||
| 
 | 
 | ||||||
| 	rule := MarkupSanitizerRule{ | 	return rule, true | ||||||
| 		Element:   elements, |  | ||||||
| 		AllowAttr: allowAttrs, |  | ||||||
| 		Regexp:    compiled, |  | ||||||
| 	} |  | ||||||
| 
 |  | ||||||
| 	ExternalSanitizerRules = append(ExternalSanitizerRules, rule) |  | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| func newMarkupRenderer(name string, sec *ini.Section) { | func newMarkupRenderer(name string, sec *ini.Section) { | ||||||
|  | @ -126,7 +141,7 @@ func newMarkupRenderer(name string, sec *ini.Section) { | ||||||
| 		return | 		return | ||||||
| 	} | 	} | ||||||
| 
 | 
 | ||||||
| 	ExternalMarkupRenderers = append(ExternalMarkupRenderers, MarkupRenderer{ | 	ExternalMarkupRenderers = append(ExternalMarkupRenderers, &MarkupRenderer{ | ||||||
| 		Enabled:         sec.Key("ENABLED").MustBool(false), | 		Enabled:         sec.Key("ENABLED").MustBool(false), | ||||||
| 		MarkupName:      name, | 		MarkupName:      name, | ||||||
| 		FileExtensions:  exts, | 		FileExtensions:  exts, | ||||||
|  |  | ||||||
		Loading…
	
	Add table
		
		Reference in a new issue
	
	 KN4CK3R
						KN4CK3R