Replacing html-pipeline with Loofah for HTML Filtering (#42737).

Patch by Takashi Kato (user:tohosaku).



git-svn-id: https://svn.redmine.org/redmine/trunk@24094 e93f8b46-1217-0410-a6f0-8f06a7374b81
This commit is contained in:
Marius Balteanu
2025-10-31 06:38:27 +00:00
parent 19927b2382
commit d89a3b5e6f
17 changed files with 214 additions and 137 deletions

View File

@@ -37,9 +37,8 @@ gem 'tzinfo-data', platforms: [:mingw, :x64_mingw, :mswin]
gem 'rotp', '>= 5.0.0'
gem 'rqrcode'
# HTML pipeline and sanitization
gem "html-pipeline", "~> 2.13.2"
gem "sanitize", "~> 6.0"
# HTML sanitization
gem "sanitize", "~> 7.0"
# Triggering of Webhooks
gem "rest-client", "~> 2.1"

View File

@@ -30,8 +30,8 @@ module Redmine
'important' => 'message-report',
}.freeze
class AlertsIconsFilter < HTML::Pipeline::Filter
def call
class AlertsIconsScrubber < Loofah::Scrubber
def scrub(doc)
doc.search("p.markdown-alert-title").each do |node|
parent_node = node.parent
parent_class_attr = parent_node['class'] # e.g., "markdown-alert markdown-alert-note"

View File

@@ -24,19 +24,19 @@ module Redmine
module CommonMark
# adds class="external" to external links, and class="email" to mailto
# links
class ExternalLinksFilter < HTML::Pipeline::Filter
def call
doc.search("a").each do |node|
class ExternalLinksScrubber < Loofah::Scrubber
def scrub(node)
if node.name == 'a'
url = node["href"]
next unless url
next if url.starts_with?("/") || url.starts_with?("#") || !url.include?(':')
return unless url
return if url.starts_with?("/") || url.starts_with?("#") || !url.include?(':')
scheme = begin
URI.parse(url).scheme
rescue
nil
end
next if scheme.blank?
return if scheme.blank?
klass = node["class"].presence
node["class"] = [
@@ -50,7 +50,6 @@ module Redmine
node["rel"] = rel.join(" ")
end
end
doc
end
end
end

View File

@@ -26,15 +26,13 @@ module Redmine
# @<a href="mailto:user@example.org">user@example.org</a>
# - autolinked hi res image names that look like email addresses:
# <a href="mailto:printscreen@2x.png">printscreen@2x.png</a>
class FixupAutoLinksFilter < HTML::Pipeline::Filter
class FixupAutoLinksScrubber < Loofah::Scrubber
USER_LINK_PREFIX = /(@|user:)\z/
HIRES_IMAGE = /.+@\dx\.(bmp|gif|jpg|jpe|jpeg|png)\z/
def call
doc.search("a").each do |node|
unless (url = node['href']) && url.starts_with?('mailto:')
next
end
def scrub(node)
if node.name == 'a'
return unless (url = node['href']) && url.starts_with?('mailto:')
if ((p = node.previous) && p.text? &&
p.text =~(USER_LINK_PREFIX)) ||
@@ -43,7 +41,6 @@ module Redmine
node.replace node.text
end
end
doc
end
end
end

View File

@@ -17,8 +17,6 @@
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
require 'html/pipeline'
module Redmine
module WikiFormatting
module CommonMark
@@ -54,14 +52,13 @@ module Redmine
}.freeze,
}.freeze
MarkdownPipeline = HTML::Pipeline.new [
MarkdownFilter,
SanitizationFilter,
SyntaxHighlightFilter,
FixupAutoLinksFilter,
ExternalLinksFilter,
AlertsIconsFilter
], PIPELINE_CONFIG
SANITIZER = SanitizationFilter.new
SCRUBBERS = [
SyntaxHighlightScrubber.new,
FixupAutoLinksScrubber.new,
ExternalLinksScrubber.new,
AlertsIconsScrubber.new
]
class Formatter
include Redmine::WikiFormatting::SectionHelper
@@ -71,8 +68,13 @@ module Redmine
end
def to_html(*args)
result = MarkdownPipeline.call @text
result[:output].to_s
html = MarkdownFilter.new(@text, PIPELINE_CONFIG).call
fragment = Redmine::WikiFormatting::HtmlParser.parse(html)
SANITIZER.call(fragment)
SCRUBBERS.each do |scrubber|
fragment.scrub!(scrubber)
end
fragment.to_s
end
end
end

View File

@@ -25,10 +25,12 @@ module Redmine
# We do not use the stock HTML::Pipeline::MarkdownFilter because this
# does not allow for straightforward configuration of render and parsing
# options
class MarkdownFilter < HTML::Pipeline::TextFilter
def initialize(text, context = nil, result = nil)
super
@text = @text.delete "\r"
class MarkdownFilter
attr_reader :context
def initialize(text, context = nil)
@text = text.delete "\r"
@context = context
end
def call

View File

@@ -21,44 +21,132 @@ module Redmine
module WikiFormatting
module CommonMark
# sanitizes rendered HTML using the Sanitize gem
class SanitizationFilter < HTML::Pipeline::SanitizationFilter
class SanitizationFilter
include Redmine::Helpers::URL
attr_accessor :allowlist
LISTS = Set.new(%w[ul ol].freeze)
LIST_ITEM = 'li'
# List of table child elements. These must be contained by a <table> element
# or they are not allowed through. Otherwise they can be used to break out
# of places we're using tables to contain formatted user content (like pull
# request review comments).
TABLE_ITEMS = Set.new(%w[tr td th].freeze)
TABLE = 'table'
TABLE_SECTIONS = Set.new(%w[thead tbody tfoot].freeze)
# The main sanitization allowlist. Only these elements and attributes are
# allowed through by default.
ALLOWLIST = {
:elements => %w[
h1 h2 h3 h4 h5 h6 br b i strong em a pre code img input tt u
div ins del sup sub p ol ul table thead tbody tfoot blockquote
dl dt dd kbd q samp var hr ruby rt rp li tr td th s strike summary
details caption figure figcaption
abbr bdo cite dfn mark small span time wbr
].freeze,
:remove_contents => ['script'].freeze,
:attributes => {
'a' => %w[href id name].freeze,
'img' => %w[src longdesc].freeze,
'code' => ['class'].freeze,
'div' => %w[class itemscope itemtype].freeze,
'li' => %w[id class].freeze,
'input' => %w[class type].freeze,
'p' => ['class'].freeze,
'ul' => ['class'].freeze,
'blockquote' => ['cite'].freeze,
'del' => ['cite'].freeze,
'ins' => ['cite'].freeze,
'q' => ['cite'].freeze,
:all => %w[
abbr accept accept-charset
accesskey action align alt
aria-describedby aria-hidden aria-label aria-labelledby
axis border cellpadding cellspacing char
charoff charset checked
clear cols colspan color
compact coords datetime dir
disabled enctype for frame
headers height hreflang
hspace ismap label lang
maxlength media method
multiple nohref noshade
nowrap open progress prompt readonly rel rev
role rows rowspan rules scope
selected shape size span
start style summary tabindex target
title type usemap valign value
vspace width itemprop
].freeze
}.freeze,
:protocols => {
'blockquote' => { 'cite' => ['http', 'https', :relative].freeze },
'del' => { 'cite' => ['http', 'https', :relative].freeze },
'ins' => { 'cite' => ['http', 'https', :relative].freeze },
'q' => { 'cite' => ['http', 'https', :relative].freeze },
'img' => {
'src' => ['http', 'https', :relative].freeze,
'longdesc' => ['http', 'https', :relative].freeze
}.freeze
},
:transformers => [
# Top-level <li> elements are removed because they can break out of
# containing markup.
lambda { |env|
name = env[:node_name]
node = env[:node]
if name == LIST_ITEM && node.ancestors.none? { |n| LISTS.include?(n.name) }
node.replace(node.children)
end
},
# Table child elements that are not contained by a <table> are removed.
lambda { |env|
name = env[:node_name]
node = env[:node]
if (TABLE_SECTIONS.include?(name) || TABLE_ITEMS.include?(name)) && node.ancestors.none? { |n| n.name == TABLE }
node.replace(node.children)
end
}
].freeze,
:css => {
:properties => %w[
color background-color
width min-width max-width
height min-height max-height
padding padding-left padding-right padding-top padding-bottom
margin margin-left margin-right margin-top margin-bottom
border border-left border-right border-top border-bottom border-radius border-style border-collapse border-spacing
font font-style font-variant font-weight font-stretch font-size line-height font-family
text-align
float
].freeze
}
}.freeze
RELAXED_PROTOCOL_ATTRS = {
"a" => %w(href).freeze,
}.freeze
ALLOWED_CSS_PROPERTIES = %w[
color background-color
width min-width max-width
height min-height max-height
padding padding-left padding-right padding-top padding-bottom
margin margin-left margin-right margin-top margin-bottom
border border-left border-right border-top border-bottom border-radius border-style border-collapse border-spacing
font font-style font-variant font-weight font-stretch font-size line-height font-family
text-align
float
].freeze
def initialize
@allowlist = default_allowlist
add_transformers
end
def allowlist
@allowlist ||= customize_allowlist(super.deep_dup)
def call(doc)
# Sanitize is applied to the whole document, so the API is different from loofeh's scrubber.
Sanitize.clean_node!(doc, allowlist)
end
private
# customizes the allowlist defined in
# https://github.com/jch/html-pipeline/blob/master/lib/html/pipeline/sanitization_filter.rb
def customize_allowlist(allowlist)
# Disallow `name` attribute globally, allow on `a`
allowlist[:attributes][:all].delete("name")
allowlist[:attributes]["a"].push("name")
allowlist[:attributes][:all].push("style")
allowlist[:css] = { properties: ALLOWED_CSS_PROPERTIES }
def add_transformers
# allow class on code tags (this holds the language info from fenced
# code bocks and has the format language-foo)
allowlist[:attributes]["code"] = %w(class)
allowlist[:transformers].push lambda{|env|
allowlist[:transformers].push lambda {|env|
node = env[:node]
return unless node.name == "code"
return unless node.has_attribute?("class")
@@ -70,9 +158,7 @@ module Redmine
# Allow class on div and p tags only for alert blocks
# (must be exactly: "markdown-alert markdown-alert-*" for div, and "markdown-alert-title" for p)
(allowlist[:attributes]["div"] ||= []) << "class"
(allowlist[:attributes]["p"] ||= []) << "class"
allowlist[:transformers].push lambda{|env|
allowlist[:transformers].push lambda {|env|
node = env[:node]
return unless node.element?
@@ -98,10 +184,8 @@ module Redmine
# allowlist[:attributes]["td"] = %w(style)
# allowlist[:css] = { properties: ["text-align"] }
# Allow `id` in a elements for footnotes
allowlist[:attributes]["a"].push "id"
# Remove any `id` property not matching for footnotes
allowlist[:transformers].push lambda{|env|
allowlist[:transformers].push lambda {|env|
node = env[:node]
return unless node.name == "a"
return unless node.has_attribute?("id")
@@ -112,8 +196,7 @@ module Redmine
# allow `id` in li element for footnotes
# allow `class` in li element for task list items
allowlist[:attributes]["li"] = %w(id class)
allowlist[:transformers].push lambda{|env|
allowlist[:transformers].push lambda {|env|
node = env[:node]
return unless node.name == "li"
@@ -128,11 +211,8 @@ module Redmine
# allow input type = "checkbox" with class "task-list-item-checkbox"
# for task list items
allowlist[:elements].push('input')
allowlist[:attributes]["input"] = %w(class type)
allowlist[:transformers].push lambda{|env|
allowlist[:transformers].push lambda {|env|
node = env[:node]
return unless node.name == "input"
return if node['type'] == "checkbox" && node['class'] == "task-list-item-checkbox"
@@ -140,10 +220,8 @@ module Redmine
}
# allow class "contains-task-list" on ul for task list items
allowlist[:attributes]["ul"] = %w(class)
allowlist[:transformers].push lambda{|env|
allowlist[:transformers].push lambda {|env|
node = env[:node]
return unless node.name == "ul"
return if node["class"] == "contains-task-list"
@@ -151,8 +229,7 @@ module Redmine
}
# https://github.com/rgrove/sanitize/issues/209
allowlist[:protocols].delete("a")
allowlist[:transformers].push lambda{|env|
allowlist[:transformers].push lambda {|env|
node = env[:node]
return if node.type != Nokogiri::XML::Node::ELEMENT_NODE
@@ -168,11 +245,12 @@ module Redmine
end
end
}
end
# Allow `u` element to enable underline
allowlist[:elements].push('u')
allowlist
# The allowlist to use when sanitizing. This can be passed in the context
# hash to the filter but defaults to ALLOWLIST constant value above.
def default_allowlist
ALLOWLIST.deep_dup
end
end
end

View File

@@ -22,11 +22,11 @@ module Redmine
module CommonMark
# Redmine Syntax highlighting for <pre><code class="language-foo">
# blocks as generated by commonmarker
class SyntaxHighlightFilter < HTML::Pipeline::Filter
def call
doc.search("pre > code").each do |node|
next unless lang = node["class"].presence
next unless lang =~ /\Alanguage-(\S+)\z/
class SyntaxHighlightScrubber < Loofah::Scrubber
def scrub(node)
if node.matches?("pre > code")
return unless lang = node["class"].presence
return unless lang =~ /\Alanguage-(\S+)\z/
lang = $1
text = node.inner_text
@@ -36,7 +36,7 @@ module Redmine
if Redmine::SyntaxHighlighting.language_supported?(lang)
html = Redmine::SyntaxHighlighting.highlight_by_language(text, lang)
next if html.nil?
return if html.nil?
node.inner_html = html
node["class"] = "#{lang} syntaxhl"
@@ -45,7 +45,6 @@ module Redmine
node.remove_attribute("class")
end
end
doc
end
end
end

View File

@@ -28,6 +28,10 @@ module Redmine
'style' => ''
}
def self.parse(html)
Loofah.html5_fragment(html)
end
def self.to_text(html)
html = html.gsub(/[\n\r]/, ' ')

View File

@@ -19,17 +19,18 @@
module Redmine
module WikiFormatting
# Combination of SanitizationFilter and ExternalLinksFilter
# Combination of SanitizationFilter and ExternalLinksScrubber
class HtmlSanitizer
Pipeline = HTML::Pipeline.new(
[
Redmine::WikiFormatting::CommonMark::SanitizationFilter,
Redmine::WikiFormatting::CommonMark::ExternalLinksFilter,
], {})
SANITIZER = Redmine::WikiFormatting::CommonMark::SanitizationFilter.new
SCRUBBERS = [Redmine::WikiFormatting::CommonMark::ExternalLinksScrubber.new]
def self.call(html)
result = Pipeline.call html
result[:output].to_s
fragment = HtmlParser.parse(html)
SANITIZER.call(fragment)
SCRUBBERS.each do |scrubber|
fragment.scrub!(scrubber)
end
fragment.to_s
end
end
end

View File

@@ -20,17 +20,20 @@
require_relative '../../../../../test_helper'
if Object.const_defined?(:Commonmarker)
require 'redmine/wiki_formatting/common_mark/alerts_icons_filter'
require 'redmine/wiki_formatting/common_mark/alerts_icons_scrubber'
class Redmine::WikiFormatting::CommonMark::AlertsIconsFilterTest < ActiveSupport::TestCase
include Redmine::I18n
def format(markdown)
Redmine::WikiFormatting::CommonMark::MarkdownFilter.to_html(markdown, Redmine::WikiFormatting::CommonMark::PIPELINE_CONFIG)
Redmine::WikiFormatting::CommonMark::MarkdownFilter.new(markdown, Redmine::WikiFormatting::CommonMark::PIPELINE_CONFIG).call
end
def filter(html)
Redmine::WikiFormatting::CommonMark::AlertsIconsFilter.to_html(html, @options)
fragment = Redmine::WikiFormatting::HtmlParser.parse(html)
scrubber = Redmine::WikiFormatting::CommonMark::AlertsIconsScrubber.new
fragment.scrub!(scrubber)
fragment.to_s
end
def setup

View File

@@ -20,15 +20,13 @@
require_relative '../../../../../test_helper'
if Object.const_defined?(:Commonmarker)
require 'redmine/wiki_formatting/common_mark/external_links_filter'
class Redmine::WikiFormatting::CommonMark::ExternalLinksFilterTest < ActiveSupport::TestCase
class Redmine::WikiFormatting::CommonMark::ExternalFilterTest < ActiveSupport::TestCase
def filter(html)
Redmine::WikiFormatting::CommonMark::ExternalLinksFilter.to_html(html, @options)
end
def setup
@options = { }
fragment = Redmine::WikiFormatting::HtmlParser.parse(html)
scrubber = Redmine::WikiFormatting::CommonMark::ExternalLinksScrubber.new
fragment.scrub!(scrubber)
fragment.to_s
end
def test_external_links_should_have_external_css_class

View File

@@ -20,19 +20,17 @@
require_relative '../../../../../test_helper'
if Object.const_defined?(:Commonmarker)
require 'redmine/wiki_formatting/common_mark/fixup_auto_links_filter'
class Redmine::WikiFormatting::CommonMark::FixupAutoLinksFilterTest < ActiveSupport::TestCase
class Redmine::WikiFormatting::CommonMark::FixupAutoLinksScrubberTest < ActiveSupport::TestCase
def filter(html)
Redmine::WikiFormatting::CommonMark::FixupAutoLinksFilter.to_html(html, @options)
fragment = Redmine::WikiFormatting::HtmlParser.parse(html)
scrubber = Redmine::WikiFormatting::CommonMark::FixupAutoLinksScrubber.new
fragment.scrub!(scrubber)
fragment.to_s
end
def format(markdown)
Redmine::WikiFormatting::CommonMark::MarkdownFilter.to_html(markdown, Redmine::WikiFormatting::CommonMark::PIPELINE_CONFIG)
end
def setup
@options = { }
Redmine::WikiFormatting::CommonMark::MarkdownFilter.new(markdown, Redmine::WikiFormatting::CommonMark::PIPELINE_CONFIG).call
end
def test_should_fixup_autolinked_user_references

View File

@@ -255,7 +255,7 @@ class Redmine::WikiFormatting::CommonMark::FormatterTest < ActionView::TestCase
def test_should_support_html_tables
text = '<table style="background: red"><tr><td>Cell</td></tr></table>'
assert_equal '<table><tr><td>Cell</td></tr></table>', to_html(text)
assert_equal '<table><tbody><tr><td>Cell</td></tr></tbody></table>', to_html(text)
end
def test_should_remove_unsafe_uris
@@ -289,10 +289,10 @@ class Redmine::WikiFormatting::CommonMark::FormatterTest < ActionView::TestCase
<p>Task list:</p>
<ul class="contains-task-list">
<li class="task-list-item">
<input type="checkbox" class="task-list-item-checkbox" disabled> Task 1
<input type="checkbox" class="task-list-item-checkbox" disabled=""> Task 1
</li>
<li class="task-list-item">
<input type="checkbox" class="task-list-item-checkbox" checked disabled> Task 2</li>
<input type="checkbox" class="task-list-item-checkbox" checked="" disabled=""> Task 2</li>
</ul>
EXPECTED

View File

@@ -24,7 +24,10 @@ if Object.const_defined?(:Commonmarker)
class Redmine::WikiFormatting::CommonMark::MarkdownFilterTest < ActiveSupport::TestCase
def filter(markdown)
Redmine::WikiFormatting::CommonMark::MarkdownFilter.to_html(markdown)
filter = Redmine::WikiFormatting::CommonMark::MarkdownFilter.new(
markdown,
Redmine::WikiFormatting::CommonMark::PIPELINE_CONFIG)
filter.call
end
# just a basic sanity test. more formatting tests in the formatter_test

View File

@@ -20,15 +20,13 @@
require_relative '../../../../../test_helper'
if Object.const_defined?(:Commonmarker)
require 'redmine/wiki_formatting/common_mark/sanitization_filter'
class Redmine::WikiFormatting::CommonMark::SanitizationFilterTest < ActiveSupport::TestCase
def filter(html)
Redmine::WikiFormatting::CommonMark::SanitizationFilter.to_html(html, @options)
end
def setup
@options = { }
fragment = Redmine::WikiFormatting::HtmlParser.parse(html)
sanitizer = Redmine::WikiFormatting::CommonMark::SanitizationFilter.new
sanitizer.call(fragment)
fragment.to_s
end
def test_should_filter_tags
@@ -137,7 +135,7 @@ if Object.const_defined?(:Commonmarker)
],
[
'Lo<!-- comment -->rem</b> <a href=pants title="foo>ipsum <a href="http://foo.com/"><strong>dolor</a></strong> sit<br/>amet <script>alert("hello world");',
'Lorem <a href="pants" title="foo&gt;ipsum &lt;a href="><strong>dolor</strong></a> sit<br>amet '
'Lorem <a href="pants" title="foo>ipsum <a href="><strong>dolor</strong></a> sit<br>amet '
],
[
'<p>a</p><blockquote>b',
@@ -217,8 +215,7 @@ if Object.const_defined?(:Commonmarker)
'protocol-based JS injection: null char' => [
"<img src=java\0script:alert(\"XSS\")>",
'<img src="java">'
# '<img>'
'<img>'
],
'protocol-based JS injection: invalid URL char' => [
@@ -228,8 +225,7 @@ if Object.const_defined?(:Commonmarker)
'protocol-based JS injection: spaces and entities' => [
'<img src=" &#14; javascript:alert(\'XSS\');">',
'<img src="">'
# '<img>'
'<img>'
],
'protocol whitespace' => [

View File

@@ -19,15 +19,13 @@
require_relative '../../../../../test_helper'
if Object.const_defined?(:Commonmarker)
require 'redmine/wiki_formatting/common_mark/syntax_highlight_filter'
class Redmine::WikiFormatting::CommonMark::SyntaxHighlightFilterTest < ActiveSupport::TestCase
class Redmine::WikiFormatting::CommonMark::SyntaxHighlightScrubberTest < ActiveSupport::TestCase
def filter(html)
Redmine::WikiFormatting::CommonMark::SyntaxHighlightFilter.to_html(html, @options)
end
def setup
@options = { }
fragment = Redmine::WikiFormatting::HtmlParser.parse(html)
scrubber = Redmine::WikiFormatting::CommonMark::SyntaxHighlightScrubber.new
fragment.scrub!(scrubber)
fragment.to_s
end
def test_should_highlight_supported_language