先遍歷100遍一個程序
import requests
import logging
import timelogging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s: %(message)s')
TOTAL_NUMBER = 100
BASE_URL = 'https://ssr4.scrape.center/'start_time = time.time()
for id in range(1,TOTAL_NUMBER + 1):url =BASE_URL.format(id=id)logging.info('scraping %s',url)response = requests.get(url)
end_time = time.time()
logging.info('total time: %s seconds',end_time - start_time)
?耗時較長
同步指不同程序單元為了完成某個任務在執行過程中需靠某種通信方式以協調一致,稱這些程序單元是同步執行的
同步意味著有序
異步 不同程序單元之間無需通信協調,也能完成任務的方式,不相關的程序單元之間可以是異步的
異步以為著無序
協程
coroutine
在python中指代為協程對象類型可以將協程對象注冊到時間循環中,它會被事件循環調用
可以使用async關鍵字來定義一個方法
這個方法在調用時不會立即被執行,而是返回一個協程對象
實例引用:
import asyncioasync def execute(X):print('Number:',X)coroutine = execute(1)
print('Coroutine:',coroutine)
print('After calling execute')loop = asyncio.get_event_loop()
loop.run_until_complete(coroutine)
print('After calling loop')
?運行結果如下:
?
import asyncioasync def execute(X):print('Number:',X)return Xcoroutine = execute(1)
print('Coroutine:',coroutine)
print('After calling execute')loop = asyncio.get_event_loop()
task = loop.create_task(coroutine)
print('Task:',task)
loop.run_until_complete(task)
print('Task:',task)
print('After calling loop')
?
import asyncio
import requestsasync def request():url = 'https://www.baidu.com'status = requests.get(url)return statusdef callback(task):print('Status:',task.result())coroutine = request()
task = asyncio.ensure_future(coroutine)
task.add_done_callback(callback)
print('Task:',task)loop = asyncio.get_event_loop()
loop.run_until_complete(task)
print('Task:',task)
一次請求
多任務協程
import asyncio
import requestsasync def request():url = 'https://www.baidu.com'status = requests.get(url)return statustasks = [asyncio.ensure_future(request()) for _ in range(5)]
print('Task:',tasks)loop = asyncio.get_event_loop()
loop.run_until_complete(asyncio.wait(tasks))for task in tasks:print('Status:',task.result())
?
import asyncio
import requests
import timestart = time.time()async def request():url = 'https://ssr4.scrape.center/'print('Waiting for',url)response = requests.get(url)print('Get response from',url,'response',response)tasks = [asyncio.ensure_future(request()) for _ in range(10)]
loop = asyncio.get_event_loop()
loop.run_until_complete(asyncio.wait(tasks))end = time.time()
print('Cost time:',end - start)
?相比于第一個耗時更少
使用aiohttp 模塊 將一個進程掛起
import asyncio
import aiohttp
import timestart = time.time()async def get(url):session = aiohttp.ClientSession()response = await session.get(url)await response.text()await session.close()return responseasync def request():url = 'https://ssr4.scrape.center/'print('Waiting for',url)response = await get(url)print('Get response from',url,'response',response)tasks = [asyncio.ensure_future(request()) for _ in range(10)]
loop = asyncio.get_event_loop()
loop.run_until_complete(asyncio.wait(tasks))end = time.time()
print('Cost time:',end - start)