Class Syndication::Atom::Data
In: lib/syndication/atom.rb
Parent: Syndication::Container

This object has to be handled specially; the parser feeds in all the REXML events, so the object can reconstruct embedded XML/XHTML. (Normally, the parser handles text buffering for a Container and calls store() when the container’s element is closed.)

Methods

html   html2text   new   tag_end   tag_start   text   text2html   txt   xhtml  

Constants

ENTITIES = { 'Aacute' => 193, 'aacute' => 225, 'Acirc' => 194, 'acirc' => 226, 'acute' => 180, 'AElig' => 198, 'aelig' => 230, 'Agrave' => 192, 'agrave' => 224, 'amp' => 38, 'Aring' => 197, 'aring' => 229, 'Atilde' => 195, 'atilde' => 227, 'Auml' => 196, 'auml' => 228, 'brvbar' => 166, 'Ccedil' => 199, 'ccedil' => 231, 'cedil' => 184, 'cent' => 162, 'copy' => 169, 'curren' => 164, 'deg' => 176, 'divide' => 247, 'Eacute' => 201, 'eacute' => 233, 'Ecirc' => 202, 'ecirc' => 234, 'Egrave' => 200, 'egrave' => 232, 'ETH' => 208, 'eth' => 240, 'Euml' => 203, 'euml' => 235, 'frac12' => 189, 'frac14' => 188, 'frac34' => 190, 'gt' => 62, 'Iacute' => 205, 'iacute' => 237, 'Icirc' => 206, 'icirc' => 238, 'iexcl' => 161, 'Igrave' => 204, 'igrave' => 236, 'iquest' => 191, 'Iuml' => 207, 'iuml' => 239, 'laquo' => 171, 'lt' => 60, 'macr' => 175, 'micro' => 181, 'middot' => 183, 'nbsp' => 160, 'not' => 172, 'Ntilde' => 209, 'ntilde' => 241, 'Oacute' => 211, 'oacute' => 243, 'Ocirc' => 212, 'ocirc' => 244, 'Ograve' => 210, 'ograve' => 242, 'ordf' => 170, 'ordm' => 186, 'Oslash' => 216, 'oslash' => 248, 'Otilde' => 213, 'otilde' => 245, 'Ouml' => 214, 'ouml' => 246, 'para' => 182, 'plusmn' => 177, 'pound' => 163, 'quot' => 34, 'raquo' => 187, 'reg' => 174, 'sect' => 167, 'shy' => 173, 'sup1' => 185, 'sup2' => 178, 'sup3' => 179, 'szlig' => 223, 'THORN' => 222, 'thorn' => 254, 'times' => 215, 'Uacute' => 218, 'uacute' => 250, 'Ucirc' => 219, 'ucirc' => 251, 'Ugrave' => 217, 'ugrave' => 249, 'uml' => 168, 'Uuml' => 220, 'uuml' => 252, 'Yacute' => 221, 'yacute' => 253, 'yen' => 165, 'yuml' => 255 }   Table of entities ripped from the XHTML spec.

Attributes

data  [R]  The decoded data, if the type is not text or XML

Public Class methods

[Source]

# File lib/syndication/atom.rb, line 89
    def initialize(parent, tag, attrs = nil)
      @tag = tag
      @parent = parent
      @type = 'text' # the default, as per the standard
      if attrs['type']
        @type = attrs['type']
      end
      @div_trimmed = false
      case @type
      when 'xhtml'
        @xhtml = ''
      when 'html'
        @html = ''
      when 'text'
        @text = ''
      end
    end

Public Instance methods

Return value of Data as HTML.

[Source]

# File lib/syndication/atom.rb, line 127
    def html
      return @html if @html
      return @xhtml if @xhtml
      return text2html(@text) if @text
      return nil
    end

Convert an HTML representation to text. This is done by throwing away all tags and converting all entities. Not ideal, but I can’t think of a better simple approach.

[Source]

# File lib/syndication/atom.rb, line 118
    def html2text(html)
      text = html.gsub(/<[^>]*>/, '')
      text = text.gsub(/&(\w)+;/) {|x|
        ENTITIES[x] ? ENTITIES[x] : ''
      }
      return text
    end

Catch tag end events if we’re collecting embedded XHTML.

[Source]

# File lib/syndication/atom.rb, line 164
    def tag_end(endtag, current)
      if @tag == endtag
        if @type == 'xhtml' and !defined? @div_stripped
          @xhtml.sub!(/^\s*<div>\s*/m,'')
          @xhtml.sub!(/\s*<\/div>\s*$/m,'')
          @div_stripped = true
        end
        return @parent
      end
      if @type == 'xhtml'
        t = endtag.sub(/^xhtml:/,'')
        @xhtml += "</#{t}>"
        return self
      else
        super
      end
    end

Catch tag start events if we’re collecting embedded XHTML.

[Source]

# File lib/syndication/atom.rb, line 154
    def tag_start(tag, attrs = nil)
      if @type == 'xhtml'
        t = tag.sub(/^xhtml:/,'')
        @xhtml += "<#{t}>"
      else
        super
      end
    end

Store/buffer text in the appropriate internal field.

[Source]

# File lib/syndication/atom.rb, line 183
    def text(s)
      case @type
      when 'xhtml'
        @xhtml += s
      when 'html'
        @html += s
      when 'text'
        @text += s
      end
    end

Convert a text representation to HTML.

[Source]

# File lib/syndication/atom.rb, line 108
    def text2html(text)
      html = text.gsub('&','&amp;')
      html.gsub!('<','&lt;')
      html.gsub!('>','&gt;')
      return html
    end

Return value of Data as ASCII text. If the field started off as (X)HTML, this is done by ruthlessly discarding markup and entities, so it is highly recommended that you use the XHTML or HTML and convert to text in a more intelligent way.

[Source]

# File lib/syndication/atom.rb, line 138
    def txt
      return @text if @text
      return html2text(@xhtml) if @xhtml
      return html2text(@html) if @html
      return nil
    end

Return value of Data as XHTML.

[Source]

# File lib/syndication/atom.rb, line 146
    def xhtml
      return @xhtml if @xhtml
      return @html if @html
      return text2html(@text) if @text
      return nil
    end

[Validate]