lastRSS.php

Go to the documentation of this file.
00001 <?php
00002 /* 
00003  ======================================================================
00004  lastRSS 0.6
00005  
00006  Simple yet powerfull PHP class to parse RSS files.
00007  
00008  by Vojtech Semecky, webmaster@webdot.cz
00009  
00010  Latest version, features, manual and examples:
00011         http://lastrss.webdot.cz/
00012 
00013  ----------------------------------------------------------------------
00014  TODO
00015  - Iconv nedavat na cely, ale jen na TITLE a DESCRIPTION (u item i celkove)
00016  ----------------------------------------------------------------------
00017  LICENSE
00018 
00019  This program is free software; you can redistribute it and/or
00020  modify it under the terms of the GNU General Public License (GPL)
00021  as published by the Free Software Foundation; either version 2
00022  of the License, or (at your option) any later version.
00023 
00024  This program is distributed in the hope that it will be useful,
00025  but WITHOUT ANY WARRANTY; without even the implied warranty of
00026  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
00027  GNU General Public License for more details.
00028 
00029  To read the license please visit http://www.gnu.org/copyleft/gpl.html
00030  ======================================================================
00031 */
00032 
00033 class lastRSS {
00034         // -------------------------------------------------------------------
00035         // Settings
00036         // -------------------------------------------------------------------
00037         var $channeltags = array ('title', 'link', 'description', 'language', 'copyright', 'managingEditor', 'webMaster', 'pubDate', 'lastBuildDate', 'rating', 'docs');
00038         var $itemtags = array('title', 'link', 'description', 'author', 'category', 'comments', 'enclosure', 'guid', 'pubDate', 'source');
00039         var $imagetags = array('title', 'url', 'link', 'width', 'height');
00040         var $textinputtags = array('title', 'description', 'name', 'link');
00041 
00042         // -------------------------------------------------------------------
00043         // Parse RSS file and returns associative array.
00044         // -------------------------------------------------------------------
00045         function Get ($rss_url) {
00046                 // If CACHE ENABLED
00047                 if ($this->cache_dir != '') {
00048                         $cache_file = $this->cache_dir . '/rsscache_' . md5($rss_url);
00049                         $timedif = @(time() - filemtime($cache_file));
00050                         if ($timedif < $this->cache_time) {
00051                                 // cached file is fresh enough, return cached array
00052                                 $result = unserialize(join('', file($cache_file)));
00053                                 // set 'cached' to 1 only if cached file is correct
00054                                 if ($result) $result['cached'] = 1;
00055                         } else {
00056                                 // cached file is too old, create new
00057                                 $result = $this->Parse($rss_url);
00058                                 $serialized = serialize($result);
00059                                 if ($f = @fopen($cache_file, 'w')) {
00060                                         fwrite ($f, $serialized, strlen($serialized));
00061                                         fclose($f);
00062                                 }
00063                                 if ($result) $result['cached'] = 0;
00064                         }
00065                 }
00066                 // If CACHE DISABLED >> load and parse the file directly
00067                 else {
00068                         $result = $this->Parse($rss_url);
00069                         if ($result) $result['cached'] = 0;
00070                 }
00071                 // return result
00072                 return $result;
00073         }
00074 
00075         // -------------------------------------------------------------------
00076         // Modification of preg_match(); return trimed field with index 1
00077         // from 'classic' preg_match() array output
00078         // -------------------------------------------------------------------
00079         function my_preg_match ($pattern, $menu) {
00080                 preg_match($pattern, $menu, $out);
00081                 return trim($out[1]);
00082         }
00083 
00084         // -------------------------------------------------------------------
00085         // Replace HTML entities &something; by real characters
00086         // -------------------------------------------------------------------
00087         function unhtmlentities ($string) {
00088                 $trans_tbl = get_html_translation_table (HTML_ENTITIES);
00089                 $trans_tbl = array_flip ($trans_tbl);
00090                 return strtr ($string, $trans_tbl);
00091         }
00092 
00093         // -------------------------------------------------------------------
00094         // Encoding conversion functiuon
00095         // -------------------------------------------------------------------
00096         function MyConvertEncoding($in_charset, $out_charset, $string) {
00097                 // if substitute_character
00098                 if ($this->subs_char) {
00099                         // Iconv() to UTF-8. mb_convert_encoding() to $out_charset
00100                         $utf = iconv($in_charset, 'UTF-8', $string);
00101                         mb_substitute_character($this->subs_char);
00102                         return mb_convert_encoding ($utf, $out_charset, 'UTF-8');
00103                 } else {
00104                         // Iconv() to $out_charset
00105                         return iconv($in_charset, $out_charset, $string);
00106                 }
00107         }
00108 
00109         // -------------------------------------------------------------------
00110         // Parse() is private method used by Get() to load and parse RSS file.
00111         // Don't use Parse() in your scripts - use Get($rss_file) instead.
00112         // -------------------------------------------------------------------
00113         function Parse ($rss_url) {
00114                 // Open and load RSS file
00115                 if ($f = @fopen($rss_url, 'r')) {
00116                         $rss_content = '';
00117                         while (!feof($f)) {
00118                                 $rss_content .= fgets($f, 4096);
00119                         }
00120                         fclose($f);
00121 
00122                         // Parse document encoding
00123                         $result['encoding'] = $this->my_preg_match("'encoding=[\'\"](.*?)[\'\"]'si", $rss_content);
00124 
00125                         // If code page is set convert character encoding to required
00126                                 if ($this->cp != '')
00127                                         $rss_content = $this->MyConvertEncoding($result['encoding'], $this->cp, $rss_content);
00128 
00129                         // Parse CHANNEL info
00130                         preg_match("'<channel.*?>(.*?)</channel>'si", $rss_content, $out_channel);
00131                         foreach($this->channeltags as $channeltag)
00132                         {
00133                                 $temp = $this->my_preg_match("'<$channeltag.*?>(.*?)</$channeltag>'si", $out_channel[1]);
00134                                 if ($temp != '') $result[$channeltag] = $temp; // Set only if not empty
00135                                 
00136                         }
00137 
00138                         // Parse TEXTINPUT info
00139                         preg_match("'<textinput(|[^>]*[^/])>(.*?)</textinput>'si", $rss_content, $out_textinfo);
00140                                 // This a little strange regexp means:
00141                                 // Look for tag <textinput> with or without any attributes, but skip truncated version <textinput /> (it's not beggining tag)
00142                         if ($out_textinfo[2]) {
00143                                 foreach($this->textinputtags as $textinputtag) {
00144                                         $temp = $this->my_preg_match("'<$textinputtag.*?>(.*?)</$textinputtag>'si", $out_textinfo[2]);
00145                                         if ($temp != '') $result['textinput_'.$textinputtag] = $temp; // Set only if not empty
00146                                 }
00147                         }
00148                         // Parse IMAGE info
00149                         preg_match("'<image.*?>(.*?)</image>'si", $rss_content, $out_imageinfo);
00150                         if ($out_imageinfo[1]) {
00151                                 foreach($this->imagetags as $imagetag) {
00152                                         $temp = $this->my_preg_match("'<$imagetag.*?>(.*?)</$imagetag>'si", $out_imageinfo[1]);
00153                                         if ($temp != '') $result['image_'.$imagetag] = $temp; // Set only if not empty
00154                                 }
00155                         }
00156                         // Parse ITEMS
00157                         preg_match_all("'<item(| .*?)>(.*?)</item>'si", $rss_content, $items);
00158                         $rss_items = $items[2];
00159                         $result['items_count'] = count($items[1]);
00160                         $i = 0;
00161                         $result['items'] = array(); // create array even if there are no items
00162                         foreach($rss_items as $rss_item) {
00163                                 // Parse one item
00164                                 foreach($this->itemtags as $itemtag)
00165                                 {
00166                                         $temp = $this->my_preg_match("'<$itemtag.*?>(.*?)</$itemtag>'si", $rss_item);
00167                                         if ($temp != '') $result[items][$i][$itemtag] = $temp; // Set only if not empty
00168                                 }
00169                                 // Strip HTML tags and other bullshit from DESCRIPTION (if description is presented)
00170                                 if ($result['items'][$i]['description'])
00171                                         $result['items'][$i]['description'] = strip_tags($this->unhtmlentities(strip_tags($result['items'][$i]['description'])));
00172                                 // Item counter
00173                                 $i++;
00174                         }
00175                         return $result;
00176                 }
00177                 else // Error in opening return False
00178                 {
00179                         return False;
00180                 }
00181         }
00182 }
00183 
00184 ?>

Generated on Sun Oct 26 20:33:13 2008 for The Travelsized Content Management System by  doxygen 1.5.5