Написал скрипт на python скачивающий картинки по запросу "<имя языка> logo" из гугла.
import os
import sys
import time
from urllib import FancyURLopener
import urllib2
import urllib
import simplejson
# Start FancyURLopener with defined version
class MyOpener(FancyURLopener):
version = 'Mozilla/5.0 (Windows; U; Windows NT 5.1; it; rv:1.8.1.11) Gecko/20071127 Firefox/2.0.0.11'
myopener = MyOpener()
def request(searchTerm, max_count):
# Set count to 0
count= 0
searchTermQuote = urllib.quote(searchTerm + ' language logo', '')
for i in range(0, 10):
# Notice that the start changes for each iteration in order to request a new set of images for each loop
url = ('https://ajax.googleapis.com/ajax/services/search/images?' + 'v=1.0&q=' + searchTermQuote + '&start=' + str(i*4) + '&userip=MyIP')
print url
request = urllib2.Request(url, None, {'Referer': 'testing'})
response = urllib2.urlopen(request)
# Get results using JSON
results = simplejson.load(response)
data = results['responseData']
dataInfo = data['results']
# Iterate for each result and get unescaped url
for myUrl in dataInfo:
count = count + 1
print myUrl['unescapedUrl']
searchTermSafe = ''.join([c for c in searchTerm if c.isalnum() or c == '.'])
name = os.path.join('imgs', searchTermSafe + str(count) + '.jpg')
myopener.retrieve(myUrl['unescapedUrl'], name)
if count >= max_count:
return
def main():
languages_list = open('list.txt', 'r').readlines()
for language in languages_list:
try:
request(language, 1)
except:
print "error with request" + language
time.sleep(1)
if __name__ == '__main__':
main()
Сейчас он трудится над
списком из 256 языков.
UPD:
Единственное, хоть как-то подходящее под описание Rexx language: