class Hermes::HeaderExt

Header field contents (RFC 2047) encoding

Examples¶ ↑

HeaderExt.encode "Jörg Müller"
                              #=> "=?utf-8?Q?J=C3=B6rg_M=C3=BCller?="
HeaderExt.decode "=?UTF-8?Q?J=C3=B6rg_M=C3=BCller?="
                              #=> "Jörg Müller"

Public Class Methods

new( [ parameters] ) → con click to toggle source

Creates a HeaderExt converter.

See the encode method for an explanation of the parameters.

Examples¶ ↑

con = HeaderExt.new
con = HeaderExt.new :base64 => true, :limit => 32, :lower => true
con = HeaderExt.new :mask => /["'()]/

# File lib/hermes/escape.rb, line 551
def initialize params = nil
  if params then
    @base64 = params.delete :base64
    @limit  = params.delete :limit
    @lower  = params.delete :lower
    @mask   = params.delete :mask
    params.empty? or
      raise ArgumentError, "invalid parameter: #{params.keys.first}."
  end
end

Public Instance Methods

decode(str) click to toggle source

# File lib/hermes/escape.rb, line 694
def decode str
  self.class.decode str
end

encode( str) → str click to toggle source

Create a header field style encoded string. The following parameters will be evaluated:

:base64    # build ?B? instead of ?Q?
:limit     # break words longer than this
:lower     # build lower case ?b? and ?q?
:mask      # a regular expression detecting characters to mask

The result will not contain any 8-bit characters. The encoding will be kept although it won't have a meaning.

The parameter :mask will have no influence on the masking itself but will guarantee characters to be masked.

Examples¶ ↑

yodel = "Holleri du dödl di, diri diri dudl dö."

con = HeaderExt.new
con.encode yodel
  #=> "Holleri du =?UTF-8?Q?d=C3=B6dl?= di, diri diri dudl =?UTF-8?Q?d=C3=B6=2E?="

yodel.encode! "iso8859-1"
con.encode yodel
  #=> "Holleri du =?ISO8859-1?Q?d=F6dl?= di, diri diri dudl =?ISO8859-1?Q?d=F6=2E?="

e = "€"
e.encode! "utf-8"      ; con.encode e      #=> "=?UTF-8?Q?=E2=82=AC?="
e.encode! "iso8859-15" ; con.encode e      #=> "=?ISO8859-15?Q?=A4?="
e.encode! "ms-ansi"    ; con.encode e      #=> "=?MS-ANSI?Q?=80?="

con = HeaderExt.new :mask => /["'()]/
con.encode "'Stop!' said Fred."
  #=> "=?UTF-8?Q?=27Stop=21=27?= said Fred."

# File lib/hermes/escape.rb, line 609
def encode str
  do_encoding str do
    # I don't like this kind of programming style but it seems to work. BS
    r, enc = "", ""
    while str =~ /\S+/ do
      if needs? $& then
        (enc.notempty? || r) << $`
        enc << $&
      else
        if not enc.empty? then
          r << (mask enc)
          enc.clear
        end
        r << $` << $&
      end
      str = $'
    end
    if not enc.empty? then
      enc << str
      r << (mask enc)
    else
      r << str
    end
    r
  end
end

encode_whole( str) → str click to toggle source

The unlike encode the whole string as one piece will be encoded.

yodel = "Holleri du dödl di, diri diri dudl dö."
HeaderExt.encode_whole yodel
  #=> "=?UTF-8?Q?Holleri_du_d=C3=B6dl_di,_diri_diri_dudl_d=C3=B6=2E?="

# File lib/hermes/escape.rb, line 645
def encode_whole str
  do_encoding str do
    mask str
  end
end

lexer(str) { |:plain, $`| ... } click to toggle source

# File lib/hermes/escape.rb, line 777
def lexer str
  while str do
    str =~ /(\s+)|\B=\?(\S*?)\?([QB])\?(\S*?)\?=\B/i
    if $1 then
      yield :plain, $` unless $`.empty?
      yield :space, $&
    elsif $2 then
      yield :plain, $` unless $`.empty?
      d = unmask $2, $3, $4
      yield :decoded, d
    else
      yield :plain, str
    end
    str = $'.notempty?
  end
end

needs? str → true or false click to toggle source

Check whether a string needs encoding.

# File lib/hermes/escape.rb, line 567
def needs? str
  (not str.ascii_only? or str =~ @mask) and true or false
end

std() click to toggle source

The standard header content encoding has a word break limit of 64.

# File lib/hermes/escape.rb, line 702
def std
  @std ||= new :limit => 64
end

Private Instance Methods

base64(c) click to toggle source

# File lib/hermes/escape.rb, line 679
def base64 c
  c = [c].pack "m*"
  c.gsub! /\s/, ""
  c
end

do_encoding(str) { || ... } click to toggle source

# File lib/hermes/escape.rb, line 653
def do_encoding str
  @charset = str.encoding
  @type, @encoder = @base64 ? [ "B", :base64] : [ "Q", :quopri ]
  if @lower then
    @charset.downcase!
    @type.downcase!
  end
  yield.force_encoding str.encoding
ensure
  @charset = @type = @encoder = nil
end

mask(str) click to toggle source

# File lib/hermes/escape.rb, line 669
def mask str
  r, i = [], 0
  while i < str.length do
    l = @limit||str.length
    r.push "=?#@charset?#@type?#{send @encoder, str[ i, l]}?="
    i += l
  end
  r.join SPACE
end

quopri(c) click to toggle source

# File lib/hermes/escape.rb, line 685
def quopri c
  c.force_encoding Encoding::ASCII_8BIT
  c.gsub! /([^ a-zA-Z0-9])/ do |s| "=%02X" % s.ord end
  c.tr! " ", "_"
  c
end

unmask(cs, tp, txt) click to toggle source

# File lib/hermes/escape.rb, line 796
def unmask cs, tp, txt
  case tp.upcase
    when "B" then                    txt, = txt.unpack "m*"
    when "Q" then txt.tr! "_", " " ; txt, = txt.unpack "M*"
  end
  cs.slice! /\*\w+\z/    # language as in rfc2231, 5.
  case cs
    when /\Autf-?7\z/i then
      # Arrgh. Ruby 1.9 doesn't seem to do that.
      txt.force_encoding Encoding::US_ASCII
      txt.gsub! /\+([0-9a-zA-Z+\/]*)-?/ do
        if $1.empty? then
          "+"
        else
          s = ("#$1==".unpack "m*").join
          (s.unpack "S>*").map { |x| x.chr ENCODING }.join
        end
      end
    when /\Aunknown/i then
      txt.force_encoding Encoding::US_ASCII
    else
      txt.force_encoding cs
  end
  txt
end