/*
instantRSS.php
version : 1.0 (2003.04.09)
copyright : ben wilson 2003
email : ben[at]thelocust[dot]org
site : http://thelocust.org/projects/instantRSS.php
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
(or you can peep it here: http://www.gnu.org/licenses/gpl.txt)
*/
/* DESCRIPTION */
/* instantRSS is a simple RSS creator for folks who don't have RSS capability */
/* for their site/blog/whatever. As long as a page is marked up appropriately*/
/* with the tags, then instantRSS can spit out an RSS */
/* file! */
/* instantRSS can be used two ways: remotely or locally. both methods of */
/* of usage will return the same output. */
/* Remote usage implies that the instantRSS script resides on a server that */
/* does not host your site/blog. This is handy for folks that don't have any */
/* type of scripting on their hosts, or just don't want to fool with it. If */
/* you'd like to use thelocust.org's instantRSS script to generate your RSS */
/* then check out instantRSS's site listed above for the url to use */
/* Local usage implies that the instantRSS script resides on the same server */
/* that hosts your site, and that instantRSS script is used *only* for your */
/* instantRSS marked pages. */
/* In either case, to give instantRSS a URL to parse: */
/* http://yourblog.com/instantRSS.php?url=http://yourblog.com/index.html */
/* just specify the page to get the RSS from in the url= variable in the URL */
/* (replacing yourblog.com with the name of the server with instantRSS on it) */
/* instantRSS markup tags */
/* i'm not going to explain RSS here, because i assume you already know what */
/* all that is about. http://voidstar.com/rssfaq should provide more info. */
/* all instantRSS tags are in this format: where */
/* xxxxx is the name of the tag. */
/* example instantRSS markup */
/*
Ben's Blog
http://thelocust.org
the blog of ben wilson
February 11, 2003
Man, I've got SO MUCH news today! The sun is shining!
*/
/* instantRSS will use whatever is in between the two corresponding tags */
/* as the value for that tag. for instance, the channel title in the above */
/* example is "Ben's Blog". instantRSS will also take values in HTML comments*/
/* as seen in the item link tag in the example above. This allows you to hide*/
/* values so that they are not see on your actual site. This may be done with*/
/* ANY valid instantRSS tag. When outputting that hidden value, the header and footer will be removed automagically. */
/* instantRSS TAGS */
/* CHANNEL TAGS */
/* title - (beginchantitle / endchantitle). if instantRSS fails to find this */
/* tag, it will attempt to find and use the
tag which is */
/* found in the of most pages. */
/* link - (beginchanlink / endchanlink). if instantRSS fails to find this */
/* tag, it will use the link passed to it via the URL variable. */
/* desc - (beginchandesc / endchandesc). if instantRSS fails to find this tag*/
/* it will attempt to use the META description tag found on many pages in */
/* in the . e.g.: */
/* */
/* ITEM TAGS */
/* item - (beginitem / enditem). Note that each item must begin and end with */
/* the corresponding tag. all item sub-tags must be within the beginning */
/* and ending item tags */
/* item title - (begintitle / endtitle) */
/* item link - (beginlink / endlink) */
/* item desc - (begindesc / enddesc) */
/* NOTES */
/* HTML will be stripped out of ALL tag values, unless specified through the */
/* HTML variable as such: */
/* http://yoursite.com/instantRSS.php?url=http://thelocust.org&html=1 */
/* There are 3 possible values for HTML=: */
/* 0 - no HTML in RSS output */
/* 1 - any and all HTML in RSS */
/* 2 - escaped HTML in RSS (spaces become %20, etc) */
/* HOW TO LINK TO IT */
/* http://yourserver.com/instantRSS.php?url=http://yourserver.com/index.html */
/* HACKING NOTES */
/* You want to hack on this and send me a patch? AWESOME. Send your changes */
/* (in a patch or as the whole file) to ben[at]thelocust[dot]org */
/* Please use spaces instead of tabs when indenting. Tabs are the work of */
/* the devil. */
/* CONFIGURATION */
/* URL RESTRICTION */
/* set bRestrictURL to 1 to restrict instantRSS to ONLY parsing URLs from the */
/* $sValidURL list. If $bRestrictURL is 0, then whatever is passed to instant */
/* RSS in the URL variable will be parsed. use at your own risk!! */
$bRestrictURL = 0;
/* set arrValidURL to the URLs that instantRSS is allowed to accept. use commas */
/* to delimit. i.e. array("thelocust.org","ae4rv.com"); */
$arrValidURL = array("thelocust.org","ae4rv.com");
/* set iHTMLLevel to set the level of HTML compatibility. */
/* 0 - no HTML in RSS output */
/* 1 - any and all HTML in RSS */
/* 2 - escaped HTML in RSS (spaces become %20, etc) */
/* NOTE: setting this variable will override an value passed in the URL. */
/* leave it commented out (with "//") to not set it. */
//$iHTMLLevel = 0;
if (!isset($iHTMLLevel) && isset($_GET['html']))
$iHTMLLevel = $_GET['html'];
else
$iHTMLLevel = 0;
if (isset($_GET['url'])) {
$URL = $_GET['url'];
if (substr($URL,0,7) != 'http://') {
$bFailure = TRUE;
echo " The URL you have provided '$URL' does not start with http://, and it should!";
}
else {
$tmp = explode("/",substr($URL,7,strlen($URL)-7));
if ( !in_array($tmp[0],$arrValidURL) && $bRestrictURL == 1 ) {
$bFailure = TRUE;
echo " The URL specified ($URL) is not in the list of valid server URLs!";
}
}
}
else
$URL = "";
if ($bFailure)
echo "
FAILURE! Some sort of horribly catastrophic failure has occured. See the above error messages for details.";
else {
//read into an array lines from the URL
$file = file($URL);
//make sure the file read was successfull
if (!$file)
echo("The URL you have given me either contains no data or could not be read. ".
"Click here to go to the URL you specified.");
else {
$file = implode("",$file);
$itemtags = array('title','link','desc');
$items = array();
//first up, split the page using the begin item tag as a delimiter
//thus, we will have an array with the beginning of the page as index 0,
//the first item at 1...
$arrfile = explode('',$file);
//find CHANNEL TITLE
$tmp = split("",$arrfile[0]);
$tmp = split("",$tmp[1]);
if (strlen($tmp[0]))
$channeltitle = stripHTML($tmp[0]);
else {
$tmp = split("",$arrfile[0]);
$tmp = split("",$tmp[1]);
$channeltitle = stripHTML($tmp[0]);
}
//find CHANNEL DESC
$tmp = split("",$arrfile[0]);
$tmp = split("",$tmp[1]);
if (strlen($tmp[0]))
$channeldesc = stripHTML($tmp[0]);
else {
$tmp = split("",$tmp[1]);
$channeldesc = stripHTML($tmp[0]);
}
//find CHANNEL LINK
$tmp = split("",$arrfile[0]);
$tmp = split("",$tmp[1]);
if (strlen($tmp[0]))
$channellink = stripHTML($tmp[0]);
else
$channellink = stripHTML($URL);
//for each item in the array, starting with index of 1
for ($i=1;$i<=sizeof($arrfile);$i++) {
$items[$i] = array();
//take the item, and split it using the enditem tag as delimiter
//this will give us an array with the actual item in index 0, and the leftover
//crap in index 1
$item = explode('',$arrfile[$i]);
//set the item to the item text in index 0
$item = $item[0];
//for each tag in "itemtags"
for ($x=0;$x",$item);
//if we didn't find a begin title tag, then set the title to item #X
//else splie the title+restofitem using the endtitle tag!
if ( sizeof($arritem) <= 1 ) {
if ($itemtags[$x] == 'link')
$items[$i][$x] = $URL;
elseif ($itemtags[$x] == 'title')
$items[$i][$x] = "item #$i";
elseif ($itemtags[$x] == 'desc')
$items[$i][$x] = "no description";
} //if sizeof
else {
$tmp = explode("",$arritem[1]);
$items[$i][$x] = stripHTML($tmp[0]);
} //if sizeof else
} //for sizeof itemtags
} //for each newsitem
/* START OUTPUT */
//print the XML header so the browser or whatever is picking this up
//will know wtf it is!
header ("content-type: text/xml");
print '';
?>
=$channeltitle?>
=$channellink?>
=$channeldesc?>
//for each item
for($j=1;$j=$items[$j][0]?>
=$items[$j][1]?>
=$items[$j][2]?>
} //end for items
?>
} //if file was good
} // if a URL has been specified
function stripHTML($string) {
global $iHTMLLevel;
//escape HTML
if ($iHTMLLevel == 2) //escapeHTML
$string = htmlentities($string);
elseif ($iHTMLLevel == 1) //leave HTML alone
$string = $string;
elseif ($iHTMLLevel == 0 || !isset($iHTMLLevel)) //strip all HTML
$string = strip_tags($string);
//return a trimmed, un-HTML-commented string
return trim(str_replace("-->","",str_replace("