HTML
- 2 ответа
- 0 вопросов
1
Вклад в тег
@home-tower:~$ curl -i http://google.com
HTTP/1.1 302 Found
Cache-Control: private
Content-Type: text/html; charset=UTF-8
Location: http://www.google.ru/?gfe_rd=cr&ei=EPmeVPyRK6Or8wf5-IDABA
Content-Length: 258
Date: Sat, 27 Dec 2014 18:23:12 GMT
Server: GFE/2.0
Alternate-Protocol: 80:quic,p=0.002
<HTML><HEAD><meta http-equiv="content-type" content="text/html;charset=utf-8">
<TITLE>302 Moved</TITLE></HEAD><BODY>
<H1>302 Moved</H1>
The document has moved
<A HREF="http://www.google.ru/?gfe_rd=cr&ei=EPmeVPyRK6Or8wf5-IDABA">here</A>.
</BODY></HTML>
bumbaram@home-tower:~/Projects/htmlparsing$ git init
bumbaram@home-tower:~/Projects/htmlparsing$ git submodule add https://github.com/lagner/QGumboParser.git lib
Cloning into 'QGumboParser'...
remote: Counting objects: 96, done.
remote: Total 96 (delta 0), reused 0 (delta 0)
Unpacking objects: 100% (96/96), done.
Checking connectivity... done.
bumbaram@home-tower:~/Projects/htmlparsing$ git submodule update --init --recursive
#include <QCoreApplication>
#include <QDebug>
#include <QNetworkAccessManager>
#include <QNetworkRequest>
#include <QNetworkReply>
#include <qgumbodocument.h>
#include <qgumbonode.h>
void requestFinished(QNetworkReply*);
void parseHtml(QString html);
int main(int argc, char *argv[])
{
QCoreApplication a(argc, argv);
QNetworkAccessManager nm;
QObject::connect(&nm, &QNetworkAccessManager::finished, requestFinished);
nm.get(QNetworkRequest(QStringLiteral("http://toster.ru/q/168437")));
return a.exec();
}
void requestFinished(QNetworkReply* rep) {
if (rep->error() == QNetworkReply::NoError) {
QByteArray rawdata = rep->readAll();
QString html = QString::fromUtf8(rawdata);
parseHtml(html);
} else {
qDebug() << "request failed: " << rep->errorString();
}
rep->deleteLater();
QCoreApplication::quit();
}
void parseHtml(QString html) {
try {
QGumboDocument doc = QGumboDocument::parse(html);
QGumboNode root = doc.rootNode();
auto nodes = root.getElementsByTagName(HtmlTag::TITLE);
for (auto& node: nodes) {
qDebug() << "title: " << node.innerText();
}
} catch (...) {
qCritical() << "smth wrong";
}
}