class Hermes::Parser
Parse a HTML file or string.
Constants
- RE_ATTR
- RE_BANG
- RE_CDATA
- RE_CMD
- RE_COMMENT
- RE_INSTR
- RE_TAG
- Tok
Attributes
list[R]
Public Class Methods
new(str, term = nil)
click to toggle source
# File lib/hermes/tags.rb, line 42 def initialize str, term = nil @list = [] s = str while s =~ /</ do add_data $` s = $' e = case s when %r{\A/\s*#{term}\s*>}im then nil when RE_TAG then s = $' t = Tok[ :tag, $1.downcase, (attrs $2), (sub_parser s, $1, $3)] s =~ %r{\A} t when RE_INSTR then Tok[ :instr, $1.downcase, (attrs $2), nil] when RE_COMMENT then Tok[ :comm, nil, nil, $1 ] when RE_CDATA then Tok[ nil, nil, nil, $1 ] when RE_BANG then Tok[ :bang, $1, (attrl $2), nil] when RE_CMD then Tok[ :cmd, $1, nil, nil] else raise Error, "Unclosed standalone tag <#{term}>." end s = $' e or break add_tok e end if term then str.replace s else add_data s end end
Public Instance Methods
find_encoding()
click to toggle source
# File lib/hermes/tags.rb, line 76 def find_encoding find_enc @list end
pretty_print()
click to toggle source
# File lib/hermes/tags.rb, line 80 def pretty_print puts_tree @list, 0 end
Private Instance Methods
add_data(str)
click to toggle source
# File lib/hermes/tags.rb, line 90 def add_data str if str.notempty? then add_tok Tok[ nil, nil, nil, str] end end
add_tok(tok)
click to toggle source
# File lib/hermes/tags.rb, line 96 def add_tok tok if not tok.type and (l = @list.last) and not l.type then l.data << tok.data else @list.push tok end end
attr_val(str)
click to toggle source
# File lib/hermes/tags.rb, line 129 def attr_val str r = case str when /\A"(.*?)"/m then $1 when /\A'(.*?)'/m then $1 when /\A\S+/ then $& end str.replace $' str.lstrip! r end
attrl(str)
click to toggle source
# File lib/hermes/tags.rb, line 120 def attrl str a = [] while str.notempty? do v = attr_val str a.push v end a end
attrs(str)
click to toggle source
# File lib/hermes/tags.rb, line 104 def attrs str a = {} while str.notempty? do str.slice! RE_ATTR or raise Error, "Illegal attribute specification: #{str}" k = $1.downcase a[ k] = if $2 then attr_val str else str.lstrip! k end end a end
find_enc(p)
click to toggle source
# File lib/hermes/tags.rb, line 140 def find_enc p p.each { |e| r = case e.type when :tag then case e.tag when "html", "head" then find_enc e.data.list when "meta" then e.attrs[ "charset"] || ( if e.attrs[ "http-equiv"] == "Content-Type" then require "hermes/contents" c = Contents.parse e.attrs[ "content"] c[ "charset"] end ) end when :query then e.attrs[ "encoding"] end return r if r } nil end
puts_tree(p, indent)
click to toggle source
# File lib/hermes/tags.rb, line 164 def puts_tree p, indent p.each { |e| print "%s[%s] %s " % [ " "*indent, e.type, e.tag, ] r = case e.type when :tag then puts ; puts_tree e.data.list, indent+1 if e.data when nil then puts "%s%s" % [ " "*(indent+1), e.data.inspect, ] else puts end } end
sub_parser(s, tag, close)
click to toggle source
# File lib/hermes/tags.rb, line 86 def sub_parser s, tag, close self.class.new s, tag unless close end