$s = file_get_contents( 'yandex.html' ); $tidy = new tidy(); $tidy->parseString( $s, array( 'output-xml' => true, 'clean' => true, 'numeric-entities' => true ), 'utf8' ); $tidy->cleanRepair(); $xml = simplexml_load_string( tidy_get_output( $tidy ) ); $adwords = $xml->xpath( '//*[class="b-adv"]' ); var_dump( $adwords ); exit; $tads = $xml->xpath( '//*[id="tads"]' ); var_dump( $tads ); exit; $a = $xml->xpath( '//a[href]' ); //var_dump( $a ); array_walk( $a, function( $item ) { $href = $item->attributes()->href; if ( strpos( $href, 'start=' ) !== false ) echo $href."\n"; //var_dump( $attrs ); //exit; } );