一部のHTML要素のみ通すフィルタ
ちがうちがうおれはなにをやっているんだ……
require "strscan"
# HTMLFilter
# 許した要素のみ残しながら、タグの対応を補完する
class HTMLFilter
ESCAPE = { '<' => '<', '>' => '>', '"' => '"' }
EMPTY_ELEMENTS = ["br", "hr", "img"]
def initialize(allow, callback=proc {|x| x})
@allow = allow
@callback = callback
end
def filter(input)
ret = ""
pool = ""
s = StringScanner.new(input)
elements = []
until s.eos?
if s.scan(%r{<(/)?(#{@allow.keys.join("|")})}i)
ret << @callback[escape(pool)]
pool = ""
name = s[2]
if s[1]
# end tag
s.scan(%r{¥s*>})
while opened_but_close = elements.pop
ret << "</#{opened_but_close}>"
break if name == opened_but_close
end
# remove the end tag if it was not opened.
else
# start tag
attrs = []
while s.scan(%r{¥s+([a-z-]+)=(?:"([^"]*)"|'([^']*)')}i)
an = s[1]
av = s[2] || s[3]
attrs << "#{an}='#{escape(av)}'" if @allow[name].include?(an)
end
attrs = attrs.empty?? "" : " #{attrs.join(" ")}"
empty = false
if s.scan(%r{¥s*(/)?>})
case
when EMPTY_ELEMENTS.include?(name)
empty = true
ret << "<#{name}#{attrs} />"
when s[1]
empty = true
ret << "<#{name}#{attrs}></#{name}>"
else
ret << "<#{name}#{attrs}>"
end
else
# invalid but continue
ret << "<#{name}#{attrs}>"
end
elements.push(name) unless empty
end
else
pool << s.getch
end
end
ret << @callback[escape(pool)]
ret << "</#{opened_but_close}>" while (opened_but_close = elements.pop)
ret
end
def escape(str)
str.gsub(/#{ESCAPE.keys.join("|")}/) {|m|
ESCAPE[m]
}
end
end
opts = {
"a" => ["href", "name"],
"strong" => [],
"br" => [],
"p" => [],
"ins" => ["datetime"],
"del" => ["datetime"],
}
inputs = DATA.read
out = HTMLFilter.new(opts, proc {|str|
str.gsub(/¥n/, "<br />¥n")
}).filter(inputs)
puts out
__END__
<script foo="<script>alert('bar')</script>">alert('foo')</script>
<script foo="<a href='link'>link</a>">alert('foo')</script>
<a href='www.g>oogle.com'>link</a>
<a href="hoge" name="aaa">hoge<strong style="">ttt</strong></a>
<a href="hoge" name="aa
{a">hoge<br><br/></a>
<ins datetime="">
<p>
aaa
</p>
<p>aaa
</ins>
<a></strong>
<p>aaa
どう書く?org のお題 をやっていたんだった。なんかだんだんズレてきたので投稿がためらわれる。改行を br にしろというお題 に対応ずみ。