Использую PHP Simple HTML DOM Parser, для парсинга gorko.ru, но получаю ошибки 404 при парсинге страниц с людьми, которые предоставляют определенные услуги, а последняя парочка ссылок вовсе искажены и без приставки "
vinnitsa.gorko.ru", попробоавил прикрутить idna_convert, но он ничем не помог, совсем ни чем. Как исправить эту проблему?
Warning: file_get_contents(http://vinnitsa.gorko.ru/фотографы/): failed to open stream: HTTP request failed! HTTP/1.1 404 Not Found in /root/parsing/lib/dom.php on line 75
Warning: file_get_contents(http://vinnitsa.gorko.ru/видеографы/): failed to open stream: HTTP request failed! HTTP/1.1 404 Not Found in /root/parsing/lib/dom.php on line 75
Warning: file_get_contents(http://vinnitsa.gorko.ru/тамада/): failed to open stream: HTTP request failed! HTTP/1.1 404 Not Found in /root/parsing/lib/dom.php on line 75
Warning: file_get_contents(http://vinnitsa.gorko.ru/оформители/): failed to open stream: HTTP request failed! HTTP/1.1 404 Not Found in /root/parsing/lib/dom.php on line 75
Warning: file_get_contents(http://vinnitsa.gorko.ru/артисты/): failed to open stream: HTTP request failed! HTTP/1.1 404 Not Found in /root/parsing/lib/dom.php on line 75
Warning: file_get_contents(/%D0%BB%D0%B8%D0%BC%D1%83%D0%B7%D0%B8%D0%BD%D1%8B/): failed to open stream: No such file or directory in /root/parsing/lib/dom.php on line 75
Warning: file_get_contents(/%D0%B0%D0%B2%D1%82%D0%BE%D0%BC%D0%BE%D0%B1%D0%B8%D0%BB%D0%B8/): failed to open stream: No such file or directory in /root/parsing/lib/dom.php on line 75
Warning: file_get_contents(/%D1%80%D0%B5%D1%81%D1%82%D0%BE%D1%80%D0%B0%D0%BD%D1%8B/): failed to open stream: No such file or directory in /root/parsing/lib/dom.php on line 75
Warning: file_get_contents(/%D1%81%D0%B2%D0%B0%D0%B4%D0%B5%D0%B1%D0%BD%D1%8B%D0%B5+%D0%BF%D0%BB%D0%B0%D1%82%D1%8C%D1%8F/): failed to open stream: No such file or directory in /root/parsing/lib/dom.php on line 75
<?php
require_once 'lib/dom.php';
include_once 'lib/idna_convert.class.php';
$city = $_GET['city'];
$html = file_get_html('http://'.$city.'.gorko.ru/');
$ul = $html->find('ul[class=indexList]', 0);
$mas = array('');
$str = array('');
$i = 0;
foreach ($ul->find('a') as $url) {
$mas[$i] = $url->href;
$i++;
}
$i = 0;
unset($ul, $html);
$converter = new idna_convert();
foreach ($mas as $arr) {
$domain = parse_url($arr, PHP_URL_HOST);
$encoded_domain = $converter->encode($domain);
$arr = str_replace($domain, $encoded_domain, $arr);
$str[$i] = file_get_html($arr);
$i++;
}
?>