import requests
url = 'https://www.artstation.com/users/kuvshinov_ilya'
json_url = 'https://www.artstation.com/users/kuvshinov_ilya/projects.json?page=1'
header = {'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.9; rv:32.0) Gecko/20100101 Firefox/32.0',}
session = requests.Session()
r = session.get(url, headers=header)
json_r = session.get(json_url, headers=header)
print(json_r)
> Response [403]
def get_session():
session = requests.Session()
session.headers = {
'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:69.0) Gecko/20100101 Firefox/69.0',
'Accept':'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
'Accept-Language':'ru,en-US;q=0.5',
'Accept-Encoding':'gzip, deflate, br',
'DNT':'1',
'Connection':'keep-alive',
'Upgrade-Insecure-Requests':'1',
'Pragma':'no-cache',
'Cache-Control':'no-cache'}
return cfscrape.create_scraper(sess=session)
session = get_session() # Дальше работать как с обычной requests.Session
import requests
import cfscrape
def get_session():
session = requests.Session()
session.headers = {
'Host':'www.artstation.com',
'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:69.0) Gecko/20100101 Firefox/69.0',
'Accept':'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
'Accept-Language':'ru,en-US;q=0.5',
'Accept-Encoding':'gzip, deflate, br',
'DNT':'1',
'Connection':'keep-alive',
'Upgrade-Insecure-Requests':'1',
'Pragma':'no-cache',
'Cache-Control':'no-cache'}
return cfscrape.create_scraper(sess=session)
def artstation():
url = 'https://www.artstation.com/kyuyongeom'
page_url = 'https://www.artstation.com/users/kyuyongeom/projects.json'
post_pattern = 'https://www.artstation.com/projects/{}.json'
session = get_session()
absolute_links = []
response = session.get(page_url, params={'page':1}).json()
pages, modulo = divmod(response['total_count'], 50)
if modulo: pages += 1
for page in range(1, pages+1):
if page != 1:
response = session.get(page_url, params={'page':page}).json()
for post in response['data']:
shortcode = post['permalink'].split('/')[-1]
inner_resp = session.get(post_pattern.format(shortcode)).json()
for img in inner_resp['assets']:
if img['asset_type'] == 'image':
absolute_links.append(img['image_url'])
with open('links.txt', 'w') as file:
file.write('\n'.join(absolute_links))
if __name__ == '__main__':
artstation()
import requests
url = 'https://www.artstation.com/users/kuvshinov_ilya'
json_url = 'https://www.artstation.com/users/kuvshinov_ilya/projects.json?page=1'
header = {
'accept':'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3',
'accept-encoding':'gzip, deflate, br',
'accept-language':'ru-RU,ru;q=0.9,en-US;q=0.8,en;q=0.7',
'cache-control':'no-cache',
'dnt': '1',
'pragma': 'no-cache',
'sec-fetch-mode': 'navigate',
'sec-fetch-site': 'none',
'sec-fetch-user': '?1',
'upgrade-insecure-requests': '1',
'user-agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.100 Safari/537.36'}
session = requests.Session()
session.headers = header
r = session.get(url)
if r.status_code == 200:
json_r = session.get(json_url)
if json_r.status_code == 200:
print(json_r.text)
else:
print(json_r.status_code)