topical media & game development 
  
  
    
    
  
 mashup-delicious-13-rally2007-lib-rss-parse.inc / inc
  <?php
  
  
 Project:     MagpieRSS: a simple RSS integration tool
 File:        rss_parse.inc  - parse an RSS or Atom feed
               return as a simple object.
 Handles RSS 0.9x, RSS 2.0, RSS 1.0, and Atom 0.3
 The lastest version of MagpieRSS can be obtained from:
 http://magpierss.sourceforge.net
*
* For questions, help, comments, discussion, etc., please join the
 Magpie mailing list:
 magpierss-general@lists.sourceforge.net
	 author:            Kellan Elliott-McCrea 
	 version:           0.7a
 @license          GPL
  
  
  define('RSS', 'RSS');
  define('ATOM', 'Atom');
  
  require_once (MAGPIE_DIR . 'rss_utils.inc');
  
  
 Hybrid parser, and object, takes RSS as a string and returns a simple object.
 see: rss_fetch.inc for a simpler interface with integrated caching support
  
  class MagpieRSS {
      var current_item   = array();  // item currently being parsed
      var channel        = array();  // hash of channel fields
      var image          = array();
      var feed_version;
      var _source_encoding = '';     // only set if we have to parse xml prolog
      
      var WARNING = "";
      
      // define some constants
      
      var _KNOWN_ENCODINGS    = array('UTF-8', 'US-ASCII', 'ISO-8859-1');
  
      // parser variables, useless if you're not a parser, treat as private
      var inchannel          = false;
      var incontent          = false; // if in Atom <content mode="xml"> field 
      var inimage            = false;
      var 
  Set up XML parser, parse source, and return populated RSS object..
   
	 parameter:  string source           string containing the RSS to be parsed
  NOTE:  Probably a good idea to leave the encoding options alone unless
         you know what you're doing as PHP's character set support is
         a little weird.
  NOTE:  A lot of this is unnecessary but harmless with PHP5 
	 parameter:  string input_encoding   the character set of the incoming RSS source. 
                                  Leave blank and Magpie will try to figure it
                                  out.
                                  
                                   
	 parameter:  bool   
  
      function MagpieRSS (source, input_encoding=null, this->error( "Failed to load PHP's XML Extension. " . 
                            "http://www.php.net/manual/en/ref.xml.php",
                             E_USER_ERROR );
          }
          
          list(source) = source, 
                  input_encoding, parser)) {
              this->parser = this->parser, this->parser, 
                  'feed_start_element', 'feed_end_element' );
                          
          xml_set_character_data_handler( status = xml_parse( source );
          
          if (! errorcode = xml_get_error_code( errorcode != XML_ERROR_NONE ) {
                  errorcode );
                  this->parser);
                  this->parser);
                  xml_error at line error_col";
  
                  errormsg );
              }
          }
          
          xml_parser_free( this->normalize();
      }
      
      function feed_start_element(element, &el = element);
          attrs, CASE_LOWER);
          
          // check for a namespace, and split if found
          element, ':' ) ) {
              list(el) = split( ':', ns and this->current_namespace = this->feed_type) ) {
              if ( this->feed_type = RSS;
                  el == 'rss' ) {
                  this->feed_version = el == 'feed' ) {
                  this->feed_version = this->inchannel = true;
              }
              return;
          }
      
          if ( this->inchannel = true;
          }
          elseif (el == 'entry' ) 
          {
              attrs['rdf:about']) ) {
                  attrs['rdf:about']; 
              }
          }
          
          // if we're in the default namespace of an RSS feed,
          //  record textinput or image fields
          elseif ( 
              this->current_namespace == '' and 
              this->intextinput = true;
          }
          
          elseif (
              this->current_namespace == '' and 
              this->inimage = true;
          }
          
          # handle atom content constructs
          elseif ( el, el == 'content' ) {
                  this->incontent = this->feed_type == ATOM and attrs_str = join(' ', 
                      array_map('map_attrs', 
                      array_keys(attrs) ) );
              
              element this->stack, this->feed_type == ATOM and attrs['rel']) and link_el = 'link';
              }
              else {
                  attrs['rel'];
              }
              
              link_el, this->stack, p, this->feed_type == ATOM and this->append_content( current_el = join('_', array_reverse(this->append(text);
          }
      }
      
      function feed_end_element (el) {
          el);
          
          if ( el == 'entry' ) 
          {
              this->current_item;
              this->initem = false;
          }
          elseif (this->current_namespace == '' and this->intextinput = false;
          }
          elseif (this->current_namespace == '' and this->inimage = false;
          }
          elseif (el, this->incontent = false;
          }
          elseif (el == 'feed' ) 
          {
              this->feed_type == ATOM and this->stack[0] == this->append_content("</this->append_content("<this->stack );
          }
          else {
              array_shift( this->current_namespace = false;
      }
      
      function concat (&str2="") {
          if (!isset(str1="";
          }
          str2;
      }
      
      
      
      function append_content(this->initem ) {
              this->current_item[ text );
          }
          elseif ( this->concat( this->incontent ], el, el) {
              return;
          }
          if ( this->initem ) {
                  this->current_item[ el ], this->inchannel) {
                  this->channel[ el ], this->intextinput) {
                  this->textinput[ el ], this->inimage) {
                  this->image[ el ], this->initem ) {
                  this->current_item[ text);
              }
              elseif (this->concat(
                      el ], this->inimage) {
                  this->image[ text );
              }
              elseif (this->concat(
                      el ], this->is_atom() ) {
              this->channel['tagline'];
              for ( i < count(i++) {
                  this->items[item['summary']) )
                      item['summary'];
                  if ( isset(item['content']['encoded'] = atom_date = (isset(item['issued'] : atom_date ) {
                      atom_date);
                      if (epoch > 0) {
                          epoch;
                      }
                  }
                  
                  i] = this->is_rss() ) {
              this->channel['description'];
              for ( i < count(i++) {
                  this->items[item['description']))
                      item['description'];
                  if ( isset(item['atom_content'] = this->is_rss() == '1.0' and isset(epoch = @parse_w3cdtf(epoch and item['date_timestamp'] = item['pubdate']) ) {
                      item['pubdate']);
                      if (item['date_timestamp'] = this->items[item;
              }
          }
      }
      
      
      function is_rss () {
          if ( this->feed_version; 
          }
          else {
              return false;
          }
      }
      
      function is_atom() {
          if ( this->feed_version;
          }
          else {
              return false;
          }
      }
  
      
 return XML parser, and possibly re-encoded source
  
      function create_parser(out_enc, detect) {
          if ( substr(phpversion(),0,1) == 5) {
              this->php5_create_parser(detect);
          }
          else {
              list(source) = source, detect);
          }
          if (this->encoding = parser, XML_OPTION_TARGET_ENCODING, parser, 
 Instantiate an XML parser under PHP5
 PHP5 will do a fine job of detecting input encoding
 if passed an empty string as the encoding. 
 All hail libxml2!
  
      function php5_create_parser(in_enc, detect && in_enc);
          }
          else {
              return xml_parser_create('');
          }
      }
      
      
 Instaniate an XML parser under PHP4
 Unfortunately PHP4's support for character encodings
 and especially XML and character encodings sucks.  As
 long as the documents you parse only contain characters
 from the ISO-8859-1 character set (a superset of ASCII,
 and a subset of UTF-8) you're fine.  However once you
 step out of that comfy little world things get mad, bad,
 and dangerous to know.
 The following code is based on SJM's work with FoF
	 see:  minutillo.com/steve/weblog/2004/6/17/php-xml-and-character-encodings-a-tale-of-sadness-rage-and-data-loss
  
      function php4_create_parser(in_enc, detect ) {
              return array(xml_parser_create(source);
          }
          
          if (!source, in_enc = strtoupper(this->source_encoding = in_enc = 'UTF-8';
              }
          }
          
          if (in_enc)) {
              return array(xml_parser_create(source);
          }
          
          // the dectected encoding is not one of the simple encodings PHP knows
          
          // attempt to use the iconv extension to
          // cast the XML to a known encoding
          //
	 see:  php.net/iconv
  
         
          if (function_exists('iconv'))  {
              in_enc,'UTF-8', encoded_source) {
                  return array(xml_parser_create('UTF-8'), encoded_source = mb_convert_encoding(in_enc );
              if (encoded_source);
              }
          }
          
          // else 
          in_enc) " .
                       "You may see strange artifacts, and mangled characters.",
                       E_USER_NOTICE);
              
          return array(xml_parser_create(), enc) {
          enc);
          if ( in_array(this->_KNOWN_ENCODINGS) ) {
              return errormsg, php_errormsg) ) { 
              php_errormsg)";
          }
          if ( MAGPIE_DEBUG ) {
              trigger_error( lvl);        
          }
          else {
              error_log( notices = E_USER_NOTICE|E_NOTICE;
          if ( notices ) {
              errormsg;
          } else {
              errormsg;
          }
      }
      
      
  } // end class RSS
  
  function map_attrs(v) {
      return "v\"";
  }
  
  // patch to support medieval versions of PHP4.1.x, 
  // courtesy, Ryan Currie, ryan@digibliss.com
  
  if (!function_exists('array_change_key_case')) {
          define("CASE_UPPER",1);
          define("CASE_LOWER",0);
  
          function array_change_key_case(case=CASE_LOWER) {
         if (cmd=strtolower;
         elseif (cmd=strtoupper;
         foreach(key=>output[key)]=output;
          }
  
  }
  
  ?>
  
  
  
(C) Æliens 
20/2/2008
You may not copy or print any of this material without explicit permission of the author or the publisher. 
In case of other copyright issues, contact the author.