Неужели дело в запросах в цикле к БД ? Скрипт работает ~1.20 минут.
Извиняйте за кучу кода.
<?php
set_time_limit(0);
require 'simple_html_dom.php';
require 'stemmer.php';
$mysqli = mysqli_connect("localhost", "root", "",
"***") or die('Error (' . mysqli_connect_errno() . ') ' . mysqli_connect_error());
mysqli_set_charset($mysqli, "utf8");
$sql = "SELECT * FROM indexing_link";
$result = mysqli_query($mysqli, $sql) or die('Error_ (' . mysqli_errno($mysqli) . ') ' . mysqli_error($mysqli));
// /**
// * Проходим по всем сохраненным файлам и вытаскиваем title/urls/words/short
// */
// while ($row = mysqli_fetch_assoc($result)) {
// $current_url = 'C:/Users/***/Downloads/1/1' . substr($row['url'], 30) . '.html';
$current_url = 'Hayden_Scott-Baron.html';
$current_url_ = 'Hayden_Scott-Baron';
$html = file_get_html($current_url);
$current_url = 'http://minecraft.gamepedia.com/' . $current_url_;
// var_dump($current_url);
// var_dump(file_exists($current_url));
// var_dump($html);
// if ($html === false) {
// continue;
// }
$title = $html->find('title', 0)->plaintext;
$text = $html->find('#bodyWrapper', 0)->plaintext;
$short = substr($html->find('#mw-content-text > p:nth-child(3)', 0)->plaintext, 0, 300) . "...";
$short = preg_replace("/\s{2,}/", ' ', $short);
$short = trim($short);
$short = addslashes($short);
$links = array();
foreach ($html->find('#bodyWrapper a[href^=/]') as $url) {
$links[] = 'http://minecraft.gamepedia.com' . htmlspecialchars_decode($url->href);
}
$text = trim($text);
$text = preg_replace("/[^a-z]+/msi", ' ', $text);
$words = explode(' ', $text);
foreach ($words as $key => &$value) {
$value = preg_replace("/\s{2,}/", ' ', $value);
$value = trim($value);
if (strlen($value) <= 2) {
unset($words[$key]);
}
}
$words = array_unique($words);
sort($words);
//Проверка
// echo $title . "<br>";
// echo $short . "<br>";
// var_dump($links);
// echo "<pre>";
// print_r($words);
// echo "</pre><br>";
/**
* Ищем id url`а с которым работаем
*/
$sql = "SELECT id FROM indexing_link WHERE url='" . $current_url . "'";
$q = mysqli_query($mysqli, $sql) or die('Error_ 1(' . mysqli_errno($mysqli) . ') ' . mysqli_error($mysqli));
$row = mysqli_fetch_assoc($q);
$id_current_url = $row['id'];
mysqli_free_result($q);
/**
* Пишем title и short страницы в бд
*/
$sql = "UPDATE indexing_link SET title='" . $title . "', short='" . $short . "' WHERE id=" . $id_current_url;
var_dump($sql);
mysqli_query($mysqli, $sql) or die('Error_ 2(' . mysqli_errno($mysqli) . ') ' . mysqli_error($mysqli));
/**
* Пишем пару откуда-куда в бд
*/
$links_id = array();
foreach ($links as $value) {
$sql = "SELECT id FROM indexing_link WHERE url='" . $value . "'";
$q = mysqli_query($mysqli, $sql) or die('Error_ 3(' . mysqli_errno($mysqli) . ') ' . mysqli_error($mysqli));
$row = mysqli_fetch_assoc($q);
$links_id[] = $row['id'];
mysqli_free_result($q);
}
$sql = "INSERT INTO `indexing_how_where` (`how`, `where`) VALUES ";
foreach ($links_id as $value) {
if ($value != null && $id_current_url != $value) {
$sql .= "('" . $id_current_url . "', '" . $value . "'), ";
}
}
$sql = rtrim($sql, ', ');
mysqli_query($mysqli, $sql) or die('Error_ 4(' . mysqli_errno($mysqli) . ') ' . mysqli_error($mysqli));
/**
* Пишем слова в бд
*/
foreach ($words as $value) {
$value = PorterStemmer::Stem($value); // Используем класс PorterStemmer для стемминга слова
$sql = "SELECT id FROM indexing_word WHERE word='" . $value . "'";
$q1 = mysqli_query($mysqli, $sql) or die('Error_ 5(' . mysqli_errno($mysqli) . ') ' . mysqli_error($mysqli));
$row = mysqli_fetch_assoc($q1);
if (mysqli_num_rows($q1) == 0) {
$sql = "INSERT INTO indexing_word (word) VALUES ('" . $value . "')";
mysqli_query($mysqli, $sql) or die('Error_ 6(' . mysqli_errno($mysqli) . ') ' . mysqli_error($mysqli));
$sql = "SELECT id FROM indexing_word WHERE word='" . $value . "'";
$q2 = mysqli_query($mysqli, $sql) or die('Error_ 7(' . mysqli_errno($mysqli) . ') ' . mysqli_error($mysqli));
$row = mysqli_fetch_assoc($q2);
$word_id = $row['id'];
$sql = "INSERT INTO indexing_link_word (word_id, link_id) VALUES (" . $word_id . ", " . $id_current_url . ")";
mysqli_query($mysqli, $sql) or die('Error_ 8(' . mysqli_errno($mysqli) . ') ' . mysqli_error($mysqli));
mysqli_free_result($q2);
} else {
$word_id = $row['id'];
$sql = "INSERT INTO indexing_link_word (word_id, link_id) VALUES (" . $word_id . ", " . $id_current_url . ")";
mysqli_query($mysqli, $sql) or die('Error_ 9(' . mysqli_errno($mysqli) . ') ' . mysqli_error($mysqli));
}
mysqli_free_result($q1);
}
// }
mysqli_free_result($result);
mysqli_close($mysqli);