# -*- coding: utf-8 -*- import requests,os,datetime ary_cached_url = [] res = requests.get('https://aiacademy.tw/api/get_urls_for_cache.php?key=sdifowexckvxwe924234') ary_urls = ['/'] if res.text.find('`')>=0: ary_urls += res.text.split('`') res = requests.get('https://aiacademy.tw/api/get_all_categories.php?key=ksaldfjeriotzkcvj') if res.text.find('`')>=0: ary_urls += [('/category/' + v) for v in res.text.split('`')] #print '------'; print ary_urls;exit(); ABS_PATH = os.path.dirname(os.path.realpath(__file__)) + '/' for v in ary_urls: url = v if url=='/': url = '' filename = 'index' else: if url[0:1]=='/': url = url[1:] if url[-1]=='/': url = url[0:-1] filename = url.replace('/','---') if v.find('http')==0 or (url in ary_cached_url): continue print v, url ary_cached_url += [url] URL = 'https://aiacademy.tw/' + url +'?nc' # means no_vcache res = requests.get(URL); if res.status_code!=200: continue; with open(ABS_PATH + filename, 'w') as f: f.write(res.text.encode(res.encoding)) with open(ABS_PATH + '__LASTEST_RUN_TIME__.txt', 'w') as f: f.write(str(datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')));