import datetime, re, cPickle import urllib2 import gzip from StringIO import StringIO datafile = file('data.pickle', 'rb') try: data = cPickle.load(datafile) except EOFError: data = {} datafile.close() first = datetime.date(2006,11,18) last = datetime.date.today() last = last.replace(day = last.day - 1) #Today's aren't guaranteed to exist. Well, none are guaranteed. But. arraycatcher = re.compile(r'^(.+)Array\[arrId\]=(\[.*\]);$') #http://www.worldofwarcraft.com/toplist/data/en/category_ddmmyyyy.js dataurl = 'http://www.worldofwarcraft.com/toplist/data/en/%(category)s_%(date)s.js' categories = ('killedby', 'spellcreate', 'auctionsold', 'gameobjloot', 'unitloot', 'questcomplete') dates = [] for delta in [datetime.timedelta(t) for t in range((last - first).days + 1)]: d = first + delta dates.append(d.strftime('%d%m%Y')) for category in categories: if not category in data: data[category] = {} for date in dates: if not date in data[category]: print "Fetching %s for %s" % (category, date) request = _fetch(dataurl % {'date': date, 'category': category}) js = request['data'].splitlines() holder = {} for line in js: match = arraycatcher.search(line) if match: name, array = match.groups() name = name.replace(category, '') holder[name] = eval(array) if holder.has_key('Name') and holder.has_key('Number') and holder.has_key('Id'): data[category][date] = holder else: print "Data for %s %s not formatted as expected." % (category, date) else: print "Skipping %s for %s, already saved" % (category, date) def _fetch(url): request = urllib2.Request(url) request.add_header('Accept-encoding', 'gzip') request.add_header('User-agent', USER_AGENT) f = urllib2.urlopen(request) data = StringIO(f.read()) f.close() if f.headers.get('content-encoding', '') == 'gzip': data = gzip.GzipFile(fileobj=data) return data datafile = file('data.pickle', 'wb') cPickle.dump(data, datafile, cPickle.HIGHEST_PROTOCOL) datafile.close()