Здравствуйте.
Данный сценарий парсил страницы с видео на iz.ru:
if (extension_loaded('curl') && function_exists('curl_init'))
{
$curl = curl_init();
curl_setopt($curl, CURLOPT_URL, (($method == 'GET' && !empty($pole)) ? $url . '?' . ltrim($pole, '&') : $url));
curl_setopt($curl, CURLOPT_SSL_VERIFYHOST, false);
curl_setopt($curl, CURLOPT_SSL_VERIFYPEER, false);
curl_setopt($curl, CURLOPT_ENCODING , '');
curl_setopt($curl, CURLOPT_HEADER, false);
curl_setopt($curl, CURLOPT_FOLLOWLOCATION, $follow);
curl_setopt($curl, CURLOPT_RETURNTRANSFER, true);
curl_setopt($curl, CURLOPT_SSL_VERIFYPEER, false); # Testing this out at the moment
# Run if this is a POST request method
if ($method != 'GET')
{
curl_setopt($curl, CURLOPT_POST, true);
curl_setopt($curl, CURLOPT_POSTFIELDS, $pole);
}
# Add the browser agent
curl_setopt($curl, CURLOPT_USERAGENT, $userAgent);
$headers = [];
$headers[] = $userAgent;
$headers[] = 'Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8';
$headers[] = 'Accept-Language: ru-RU,ru;q=0.8,en-US;q=0.5,en;q=0.3';
$headers[] = 'Connection: keep-alive';
$headers[] = 'Upgrade-Insecure-Requests: 1';
$headers[] = 'Sec-Fetch-Dest: document';
$headers[] = 'Sec-Fetch-Mode: navigate';
$headers[] = 'Sec-Fetch-Site: none';
$headers[] = 'Sec-Fetch-User: ?1';
$headers[] = 'Pragma: no-cache';
$headers[] = 'Cache-Control: no-cache';
curl_setopt($curl, CURLOPT_HTTPHEADER, $headers);
# Check if we need to set some cookies
if ($cookies !== null)
{
$line = "\n";
# Loop thru all the cookies we currently have set
foreach ($cookies as $key => $value)
{
# Make sure we don't see the session ID or the browser will crash
if ($key == 'PHPSESSID')
{
continue;
}
# Add the cookies
$line .= $key . '=' . $value . '; ';
}
# Trim the cookie
$line = trim(rtrim($line, '; '));
# Set the cookie
curl_setopt($curl, CURLOPT_COOKIE, $line);
}
# Run the exec
$charset = $this->getCharset($url);
if (isset($charset) && !in_array($charset, ['utf-8', 'UTF-8']) && $module != 'video')
{
$data = iconv('CP1251', 'UTF-8', curl_exec($curl));
}
else
{
$data = curl_exec($curl);
}
# Close the curl connection
curl_close($curl);
# Return whatever we can from the curl request
return trim($data);
}
С некоторых пор, сценарий натыкается на страницу проверки на бота/человека. В итоге, необходимые операции не выполнены.
Подскажите, пожалуйста, как можно обойти страницу проверки на бота?
Спасибо.