<?php
require_once('parserutils.class.php');
class RSSParser implements Iterator {
private $position = 0;
private $rss = [];
public function __construct($rss) {
$this->position = 0;
$tmp = ParserUtils::normalizeXML($rss->channel);
if(!empty($tmp['item'])){
$this->rss = $tmp['item'];
}
}
function rewind() {
$this->position = 0;
}
function current() {
$c = $this->rss[$this->position];
return ParserUtils::constructRssItem(
$c->title,
$c->link,
$c->description,
$c->pubDate
);
}
function key() {
return $this->position;
}
function next() {
++$this->position;
}
function valid() {
return isset($this->rss[$this->position]);
}
public static function check($rss){
return !(empty($rss) || empty($rss->channel) || empty($rss->channel->item));
}
}
?>
<?php
class AtomParser implements Iterator {
private $position = 0;
private $rss = [];
public function __construct($rss) {
$this->position = 0;
$tmp = ParserUtils::normalizeXML($rss);
if(!empty($tmp['entry'])){
$this->rss = $tmp['entry'];
}
}
function rewind() {
$this->position = 0;
}
function current() {
$c = $this->rss[$this->position];
$lastLink = null;
foreach ($c->link as $vl) {
$lastLink = $vl;
if($vl['type'] == "text/html"){
break;
}
}
return ParserUtils::constructRssItem(
$c->title,
$lastLink["href"],
$c->content,
$c->updated
);
}
function key() {
return $this->position;
}
function next() {
++$this->position;
}
function valid() {
return isset($this->rss[$this->position]);
}
public static function check($rss){
return !(empty($rss) || empty($rss->entry));
}
}
?>
<?php
require_once('RollingCurl.php');
class ParserUtils
{
final private function __construct() {}
final private function __clone() {}
static $curlOpt = [CURLOPT_USERAGENT => "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.17 (KHTML, like Gecko) Chrome/24.0.1312.52 Safari/537.17"];
public static function normalizeXML ($xmlObject)
{
$res = [];
foreach ( (array) $xmlObject as $index => $node ){
$res[$index] = ( is_object ( $node ) ) ? self::normalizeXML ( $node ) : $node;
}
return $res;
}
public static function constructRssItem($title, $link, $content, $update){
$content = preg_replace('/\[crayon-.+\]/U','',trim($content));
$title = trim(strip_tags($title));
$update = date('Y-m-d H:i:s',strtotime($update));
return $res = [
'content'=>$content,
'update'=>$update,
'title'=>$title,
'link'=>trim(strip_tags($link))
];
}
public static function multiDownLoad($urls, $threadCount = 0){
$result = [];
if($threadCount <= 0){
$threadCount = count($urls);
}
$rc = new RollingCurl(function($response, $info, $request) use(&$result){
if( $info["http_code"] == 200 && !empty($response)){
$result[$request->url] = $response;
}
});
$rc->window_size = $threadCount;
foreach ($urls as $url) {
$rc->get($url, null, self::$curlOpt);
}
$rc->execute();
return $result;
}
public static function download($url){
$result = null;
$rc = new RollingCurl(function($response, $info, $request) use(&$result){
if( $info["http_code"] == 200 && !empty($response)){
$result = $response;
}
});
$rc->get($url, null, self::$curlOpt);
$rc->execute();
return $result;
}
}
?>
public static function getParser($raw_content){
$rss = simplexml_load_string($raw_content, 'SimpleXMLElement', LIBXML_NOWARNING | LIBXML_NOERROR);
if(RSSParser::check($rss)){
return new RSSParser($rss);
}else if(AtomParser::check($rss)){
return new AtomParser($rss);
}else{
return null;
}
}