User-agent: *
Disallow: /
$s = file_get_contents( 'yandex.html' );
$tidy = new tidy();
$tidy->parseString( $s, array(
'output-xml' => true,
'clean' => true,
'numeric-entities' => true
), 'utf8' );
$tidy->cleanRepair();
$xml = simplexml_load_string( tidy_get_output( $tidy ) );
$adwords = $xml->xpath( '//*[@class="b-adv"]' );
var_dump( $adwords );
exit;
$tads = $xml->xpath( '//*[@id="tads"]' );
var_dump( $tads );
exit;
$a = $xml->xpath( '//a[@href]' );
//var_dump( $a );
array_walk( $a, function( $item ) {
$href = $item->attributes()->href;
if ( strpos( $href, 'start=' ) !== false )
echo $href."\n";
//var_dump( $attrs );
//exit;
} );
<source lang="php">
<?php
$s = file_get_contents( 'yandex.html' );
$tidy = new tidy();
$tidy->parseString( $s, array(
'output-xml' => true,
'clean' => true,
'numeric-entities' => true
), 'utf8' );
$tidy->cleanRepair();
$xml = simplexml_load_string( tidy_get_output( $tidy ) );
$adwords = $xml->xpath( '//*[class="b-adv"]' );
var_dump( $adwords );
exit;
$tads = $xml->xpath( '//*[id="tads"]' );
var_dump( $tads );
exit;
$a = $xml->xpath( '//a[href]' );
//var_dump( $a );
array_walk( $a, function( $item ) {
$href = $item->attributes()->href;
if ( strpos( $href, 'start=' ) !== false )
echo $href."\n";
//var_dump( $attrs );
//exit;
} );
?>
</source>