import requests
import csv
from bs4 import BeautifulSoup as bs
headers={'accept':'*/*',
'user-agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.90 Safari/537.36'
}
base_url='https://vk.com/hearthstone'
def hs_parse(base_url,headers):
stats=[]
urls=[]
urls.append(base_url)
session=requests.session()
request=session.get(base_url,headers=headers)
while len(stats) < 60:
if request.status_code==200:
soup=bs(request.content,'lxml')
for url in urls:
request = session.get(url, headers=headers)
soup = bs(request.content, 'lxml')
divs = soup.find_all('div', attrs={'class': 'like_btns'})
for a in divs:
com=soup.find_all('a',attrs={'class':'like_btn comment _comment _reply_wrap'})
for div in a:
comentators=a.find('div',attrs={'class':'like_button_count'})
if comentators not in stats:
stats.append({'comentators':comentators})
else:
pass
for a in divs:
lik=soup.find_all('a',attrs={'class':'like_btn like _like'})
for div in a:
likes=a.find('div',attrs={'class':'like_button_count'})
if likes not in stats:
stats.append({' likes': likes})
else:
pass
for a in divs:
rep = soup.find_all('a', attrs={'class': 'like_btn share _share'})
for div in a:
reposts = a.find('div', attrs={'class': 'like_button_count'})
if reposts not in stats:
stats.append({' reposts': reposts})
else:
pass
for a in divs:
wh = soup.find_all('a', attrs={'class': 'like_views _views'})
for div in a:
watching = a.find('div', attrs={'data-count': 'title'})
if watching not in stats:
stats.append({' watching': watching})
else:
pass
for url in urls:
request = session.get(url, headers=headers)
soup = bs(request.content, 'lxml')
content = soup.find('div', attrs={'class': 'wall_post_text'})
if content not in stats:
stats.append(content)
else:
pass
for url in urls:
request = session.get(url, headers=headers)
soup = bs(request.content, 'lxml')
author = soup.find('div', attrs={'data-post-click-type': 'post_owner_link'})
stats.append(author)
print(len(stats))
else:
print('DONE')
print(stats)
return stats
def files_writer(stats):
with open('parsed_hs_pub.csv','w',encoding='utf-8'
) as file:
writer=csv.writer(file)
writer.writerow((' author','content','likes','comentators','reposts','watching'))
for stats in stats:
writer.writerow((stats[' author'],stats['content'],stats['likes'],stats['comentators'],stats['reposts'],stats['watching']))
stats=hs_parse(base_url,headers)
files_writer(stats)