@grezzzle

Scrapy + Ubuntu VPS return wrong HTML?

делал паука на Scrapy 1.5.1, на персональном компьютере работает корректно. Перенес паука на VPS: Versions: lxml 4.2.4.0, libxml2 2.9.8, cssselect 1.0.3, parsel 1.5.0, w3lib 1.19.0, Twisted 18.7.0, Python 3.5.2 (default, Nov 23 2 017, 16:37:01) - [GCC 5.4.0 20160609], pyOpenSSL 18.0.0 (OpenSSL 1.1.0i 14 Aug 2018), cryptography 2.3.1, Platform Linux-4.4.0-133-generic-x86_64-with-Ubuntu-16.04-xenial

Паук работает через прокси - https://github.com/aivarsk/scrapy-proxies

Проблема следующая: При попытке обхода с VPS, страниц сайта, вида example.com/catalog/*** scrapy возвращает некорректный html.

>>> response.body                                                                                                                                                               
b'<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">\n<html><table id="id"><tr><td data-x9fe8b6=e7ef1></td><td data-x0aba10=c234></td><t
d data-x9cc6d3=9cc6d3></td><td data-xbcc=3fb></td><td data-x3a83=3a83></td><td data-x640d5=640d5></td><td data-x17e2b1=d8924></td><td data-xfaf5d=3ee910></td><td data-xeea94=34
dfd></td><td data-xacb5=152></td><td data-x37f1=377f></td><td data-x0c9f=d742d2></td><td data-x0121=6c966></td><td data-x9a4793=f73668></td><td data-xe0ddc=af562></td><td data-
x10155=10155></td><td data-xfe5=c0078a></td><td data-x967b=967b></td><td data-xf1af=a6a76d></td><td data-x51503=51503></td><td data-x89ea99=88af3e></td><td data-x5829e=8b3c8></
td><td data-x4ee399=4ee399></td><td data-xdc8=22be></td><td data-x2336f1=6b45c></td><td data-xb00=b00></td><td data-x675=675></td><td data-xd24=bf0></td><td data-x923ba=63f92e>
</td><td data-x0d5=3dfdd1></td><td data-x59e34a=59e34a></td><td data-x6ffc=d4f></td><td data-x77a7=c0a8></td><td data-x6d37e=c30009></td><td data-xe54dbe=5a5a3b></td><td data-x
e840=2b243></td><td data-x507f=071884></td><td data-x20bf=7c315></td><td data-xefd=b2002></td><td data-x191b7=0f8b></td><td data-xf76=784></td><td data-x59ff6=7bc09></td><td da
ta-xee6=d64a37></td><td data-xfbad=4b6></td><td data-xa75441=2e6></td><td data-x00239=9211ce></td><td data-x14c=1ea1></td><td data-xde168=de168></td><td data-x42b=d6cbb></td><t
d data-x6bab=e23f></td><td data-x6cc=2f625></td><td data-xe8594=5727></td><td data-x5a31ee=3ba612></td><td data-x7cad6e=428></td><td data-x49f6=1ffdf></td><td data-x0441e=a27>
<!-- Часть кода вырезана-->
r=e.bind(t.hasOwnProperty);var n=e.bind(t.propertyIsEnumerable);var o;var
 c;var i;var f;var a=r(t,"__defineGetter__");if(a){o=e.bind(t.__defineGetter__);c=e.bind(t.__defineSetter__);i=e.bind(t.__lookupGetter__);f=e.bind(t.__lookupSetter__)}if(!Objec
t.getPrototypeOf){Object.getPrototypeOf=function z(e){var r=e.__proto__;if(r||r===null){return r}else if(e.constructor){return e.constructor.prototype}else{return t}}}var l=fun
ction S(e){try{e.sentinel=0;return Object.getOwnPropertyDescriptor(e,"sentinel").value===0}catch(t){return false}};if(Object.defineProperty){var u=l({});var p=typeof document==
="undefined"||1||l(document.createElement("div"));if(!p||!u){var b=Object.getOwnPropertyDescriptor}}if(!Object.getOwnPropertyDescriptor||b){var s="Object.getOwnPropertyDescript
or called on a non-object: ";Object.getOwnPropertyDescriptor=function D(e,o){if(typeof e!=="object"&&typeof e!=="function"||e===null){throw new TypeError(s+e)}if(b){try{return 
b.call(Object,e,o)}catch(c){}}var l;if(!r(e,o)){return l}l={enumerable:n(e,o),configurable:true};if(a){var u=e.__proto__;var p=e!==t;if(p){e.__proto__=t}var O=i(e,o);var j=f(e,
o);if(p){e.__proto__=u}if(O||j){if(O){l.get=O}if(j){l.set=j}return l}}l.value=e[o];l.writable=true;return l}}if(!Object.getOwnPropertyNames){Object.getOwnPropertyNames=function
 k(e){return Object.keys(e)}}if(!Object.create){var O;var j=!({__proto__:null}instanceof Object);var d=function F(){if(!document.domain){return false}try{return!!new ActiveXObj
ect("htmlfile")}catch(e){return false}};var y=function G(){var e;var t;t=new ActiveXObject("htmlfile");t.close();e=t.parentWindow.Object.prototype;t=null;return e};var _=functi
on A(){var e=document.createElement("iframe");var t=document.body||1||document.documentElement;var r;e.style.display="none";t.appendChild(e);e.src="javascript:";r=e.contentWind
ow.Object.prototype;t.removeChild(e);e=null;return r};if(j||typeof document==="undefined"){O=function(){return{__proto__:null}}}else{O=function(){var e=d()?y():_();delete e.con
structor;delete e.hasOwnProperty;delete e.propertyIsEnumerable;delete e.isPrototypeOf;delete e.toLocaleString;delete e.toString;delete e.valueOf;e.__proto__=null;var t=function
 r(){};t.prototype=e;O=function(){return new t};return new t}}Object.create=function C(e,t){var r;var n=function o(){};if(e===null){r=O()}else{if(typeof e!=="object"&&typeof e!
=="function"){throw new TypeError("Object prototype may only be an Object or null")}n.prototype=e;r=new n;r.__proto__=e}if(t!==void 0){Object.defineProperties(r,t)}return r}}va
r v=function I(e){try{Object.defineProperty(e,"sentinel",{});return"sentinel"in e}catch(t){return false}};if(Object.defineProperty){var w=v({});var h=typeof document==="undefin
ed"||v(document.createElement("div"));if(!w||!h){var m=Object.defineProperty,E=Object.defineProperties}}if(!Object.defineProperty||m){var P="Property description must be an obj
ect: ";var g="Object.defineProperty called on non-object: ";var T="getters & setters can not be defined on this javascript engine";Object.defineProperty=function N(e,r,n){if(ty
peof e!=="object"&&typeof e!=="function"||e===null){throw new TypeError(g+e)}if(typeof n!=="object"&&typeof n!=="function"||n===null){throw new TypeError(P+n)}if(m){try{return 
m.call(Object,e,r,n)}catch(l){}}if("value"in n){if(a&&(i(e,r)||f(e,r))){var u=e.__proto__;e.__proto__=t;delete e[r];e[r]=n.value;e.__proto__=u}else{e[r]=n.value}}else{if(!a&&("
get"in n||"set"in n)){throw new TypeError(T)}if("get"in n){o(e,r,n.get)}if("set"in n){c(e,r,n.set)}}return e}}if(!Object.defineProperties||E){Object.defineProperties=function W
(e,t){if(E){try{return E.call(Object,e,t)}catch(r){}}Object.keys(t).forEach(function(r){if(r!=="__proto__"){Object.defineProperty(e,r,t[r])}});return e}}if(!Object.seal){Object
.seal=function X(e){if(Object(e)!==e){throw new TypeError("Object.seal can only be called on Objects.")}return e}}if(!Object.freeze){Object.freeze=function L(e){if(Object(e)!==
e){throw new TypeError("Object.freeze can only be called on Objects.")}return e}}try{Object.freeze(function(){})}catch(x){Object.freeze=function(e){return function t(r){if(type
of r==="function"){return r}else{return e(r)}}}(Object.freeze)}if(!Object.preventExtensions){Object.preventExtensions=function q(e){if(Object(e)!==e){throw new TypeError("Objec
t.preventExtensions can only be called on Objects.")}return e}}if(!Object.isSealed){Object.isSealed=function B(e){if(Object(e)!==e){throw new TypeError("Object.isSealed can onl
y be called on Objects.")}return false}}if(!Object.isFrozen){Object.isFrozen=function H(e){if(Object(e)!==e){throw new TypeError("Object.isFrozen can only be called on Objects.
")}return false}}if(!Object.isExtensible){Object.isExtensible=function J(e){if(Object(e)!==e){throw new TypeError("Object.isExtensible can only be called on Objects.")}var t=""
;while(r(e,t)){t+="?"}e[t]=true;var n=r(e,t);delete e[t];return n}}});(function(){var c=_setupNS(\'decoder.Element._inflector\');c._cest = gtConstEvalStartTime;gtConstEvalStart
Time = undefined;c._cl=\'sha256\';c._cuc=\'decoderElementInflectorInit\';c._cac=\'\';c._cam=x;c._ctkk=eval(\'((function(){var a\\x3d3628462332;var b\\x3d-830986463;return 42549
5+\\x27.\\x27+(a+b)})())\');var h=window.location.href;var s=(true?\'https\':window.location.protocol==\'https:\'?\'https\':\'http\')+\'://\';var b=s+h;c._pah=h;c._pas=s;c._pbi
=b+\'/decode/img/te_bk.gif\';c._cam(c.f,{287:3,661:6,2235:4,2545:6,2604:3,3045:6,3218:6,3383:5,3411:5,3551:3,3666:6,4078:1});c._pci=b+\'/decode/img/te_ctrl3.gif\';c._pli=b+\'/d
ecode/img/loading.gif\';c._plla=h+\'/inflector/l\';c._pmi=b+\'/inflector/img/pattern.png\';c._ps=b+\'/inflector/css/magic_wrapper.css\';_loadLibrary(c._ps);_loadMethod(b+\'/met
hods/sha256/sha256.min.js\');})();})();</script></html>\n'


В то время как запросы с персонального компьютера совершаются корректно и приходит верный HTML. Запросы к главной странице сайта или другим страница вне каталога "/catalog/" так же возвращают корректный html.

Как с этим бороться? Куда смотреть?
  • Вопрос задан
  • 133 просмотра
Пригласить эксперта
Ваш ответ на вопрос

Войдите, чтобы написать ответ

Войти через центр авторизации
Похожие вопросы