requestsを見てみる
そもそも
requestsはurllib3をもとに作られている。requests.__init__.pyを見ればわかる。それでHTTPリクエストを送る関係のライブラリがpythonには複数ある。
urllib2, urllib(.request), urllib3. requests。これらについて理解するのも後々役に立つだろう。
- urllib2...python2で使われていたもの。python3では使えない。
- urllib(.request)...python3でurllib2の関数を編成して新しい単位にした。
- urllib3...上記とは違い非標準ライブラリ、つまり3rdパーティ製ライブラリ。
- requests... urllib3をもとにさらに使いやすくしたもの。
requestsの中身はどうなっているのか
一つのライブラリを使えるようになるだけなくどのように動いているか調べてみる。
ソース
https://github.com/kennethreitz/requests/tree/master/requests
クラスの整理
requests/models.py
https://github.com/kennethreitz/requests/blob/master/requests/models.py
requests/models.pyでクラスが定義されている。
class RequestEncodingMixin(object) class RequestHooksMixin(object) class Request(RequestHooksMixin) class PreparedRequest(RequestEncodingMixin, RequestHooksMixin) class Response(object)
requests/session.py
class SessionRedirectMixin(object) class Session(SessionRedirectMixin)
requests/adapters.py
class BaseAdapter(object) class HTTPAdapter(BaseAdapter)
requests/cookies.py
class MockRequest(object) class MockResponse(object) class CookieConflictError(RuntimeError) class RequestsCookieJar(cookielib.CookieJar, MutableMapping)
requests.get(URL)に限って言えば、各クラスは以下の関係がある。
再考中。
コードの流れの整理
import requests requests.get(URL)
1. [ import requests ] =======>> [ requests.__init__.pyにてapi.get関数をインポート ] 2. [ requests.get(URL) ] ============ [ get関数を実行 ] 3. [ get関数を実行 ] ===============>> 戻り値[ api.request関数 (の実行) ] 4. [ api.request関数 (の実行) ] =====>> 戻り値[ Sessionインスタンス生成+requestメソッド実行 ] 5. [ Sessionインスタンス生成+requestメソッド実行] ======>> 戻り値[ Responseインスタンス作成 ]
1. [ import requests ] =========>> [ requests.__init__.pyにてapi.get関数をインポート ]
https://github.com/kennethreitz/requests/blob/master/requests/__init__.py#L115
from .api import request, get, head, post, patch, put, delete, options
2. [ requests.get(URL) ] ============ [ get関数を実行 ]
3.[ get関数を実行 ] =========>> 戻り値[ api.request関数 (の実行) ]
def get(url, params=None, **kwargs): kwargs.setdefault('allow_redirects', True) return request('get', url, params=params, **kwargs)
4.[ api.request関数 (の実行) ] ====>> 戻り値[Sessionインスタンス生成+requestメソッド実行]
def request(method, url, **kwargs): with sessions.Session() as session: return session.request(method=method, url=url, **kwargs)
5.[ Sessionインスタンス生成+requestメソッド実行] ======>> 戻り値[ Responseインスタンス作成 ]
(( requestメソッド実行 )) def request(self, method, url, params=None, data=None, headers=None, cookies=None, files=None, auth=None, timeout=None, allow_redirects=True, proxies=None, hooks=None, stream=None, verify=None, cert=None, json=None): # Create the Request. req = Request( method=method.upper(), url=url, headers=headers, files=files, data=data or {}, json=json, params=params or {}, auth=auth, cookies=cookies, hooks=hooks, ) prep = self.prepare_request(req) proxies = proxies or {} settings = self.merge_environment_settings( prep.url, proxies, stream, verify, cert ) # Send the request. send_kwargs = { 'timeout': timeout, 'allow_redirects': allow_redirects, } send_kwargs.update(settings) resp = self.send(prep, **send_kwargs) return resp
Constructs a :class:`Request <Request>`, prepares it and sends it.Returns :class:`Response <Response>` object.
まず最初にRequestインスタンスを作成し、RequestインスタンスからPreparedRequestインスタンスに変換し、それを送信する。その結果Responseインスタンスを得る。
Responseインスタンスを得るのはSession.send()メソッドの実行によってであり、このsend()メソッドはさらに複数のクラスメソッド、関数から構成されている。
requestメソッドに含まれるメソッド、関数を軽く説明
- Session.prepare_request()はRequestインスタンス(req)を引数にPreparedRequestインスタンスを生成する。
https://github.com/kennethreitz/requests/blob/master/requests/sessions.py#L426 - Session.merge_environment_settings()
- DICT.update()
- Session.send()
Session.send()
Session.send()
def send(self, request, **kwargs): """Send a given PreparedRequest. :rtype: requests.Response """ # Set defaults that the hooks can utilize to ensure they always have # the correct parameters to reproduce the previous request. kwargs.setdefault('stream', self.stream) kwargs.setdefault('verify', self.verify) kwargs.setdefault('cert', self.cert) kwargs.setdefault('proxies', self.proxies) # It's possible that users might accidentally send a Request object. # Guard against that specific failure case. if isinstance(request, Request): raise ValueError('You can only send PreparedRequests.') # Set up variables needed for resolve_redirects and dispatching of hooks allow_redirects = kwargs.pop('allow_redirects', True) stream = kwargs.get('stream') hooks = request.hooks # Get the appropriate adapter to use adapter = self.get_adapter(url=request.url) # Start time (approximately) of the request start = preferred_clock() # Send the request r = adapter.send(request, **kwargs) # Total elapsed time of the request (approximately) elapsed = preferred_clock() - start r.elapsed = timedelta(seconds=elapsed) # Response manipulation hooks r = dispatch_hook('response', hooks, r, **kwargs) # Persist cookies if r.history: # If the hooks create history then we want those cookies too for resp in r.history: extract_cookies_to_jar(self.cookies, resp.request, resp.raw) extract_cookies_to_jar(self.cookies, request, r.raw) # Redirect resolving generator. gen = self.resolve_redirects(r, request, **kwargs) # Resolve redirects if allowed. history = [resp for resp in gen] if allow_redirects else [] # Shuffle things around if there's history. if history: # Insert the first (original) request at the start history.insert(0, r) # Get the last request made r = history.pop() r.history = history # If redirects aren't being followed, store the response on the Request for Response.next(). if not allow_redirects: try: r._next = next(self.resolve_redirects(r, request, yield_requests=True, **kwargs)) except StopIteration: pass if not stream: r.content return r
# Get the appropriate adapter to use adapter = self.get_adapter(url=request.url)の部分ではprefixがhttps://なのかhttp://一つに絞っている。
Session.send()メソッド
def send(self, request, stream=False, timeout=None, verify=True, cert=None, proxies=None): """Sends PreparedRequest object. Returns Response object. :param request: The :class:`PreparedRequest` being sent. :param stream: (optional) Whether to stream the request content. :param timeout: (optional) How long to wait for the server to send data before giving up, as a float, or a :ref:`(connect timeout, read timeout) ` tuple. :type timeout: float or tuple or urllib3 Timeout object :param verify: (optional) Either a boolean, in which case it controls whether we verify the server's TLS certificate, or a string, in which case it must be a path to a CA bundle to use :param cert: (optional) Any user-provided SSL certificate to be trusted. :param proxies: (optional) The proxies dictionary to apply to the request. :rtype: requests.Response """ try: conn = self.get_connection(request.url, proxies) except LocationValueError as e: raise InvalidURL(e, request=request) self.cert_verify(conn, request.url, verify, cert) url = self.request_url(request, proxies) self.add_headers(request, stream=stream, timeout=timeout, verify=verify, cert=cert, proxies=proxies) chunked = not (request.body is None or 'Content-Length' in request.headers) if isinstance(timeout, tuple): try: connect, read = timeout timeout = TimeoutSauce(connect=connect, read=read) except ValueError as e: # this may raise a string formatting error. err = ("Invalid timeout {}. Pass a (connect, read) " "timeout tuple, or a single float to set " "both timeouts to the same value".format(timeout)) raise ValueError(err) elif isinstance(timeout, TimeoutSauce): pass else: timeout = TimeoutSauce(connect=timeout, read=timeout) try: if not chunked: resp = conn.urlopen( method=request.method, url=url, body=request.body, headers=request.headers, redirect=False, assert_same_host=False, preload_content=False, decode_content=False, retries=self.max_retries, timeout=timeout ) # Send the request. else: if hasattr(conn, 'proxy_pool'): conn = conn.proxy_pool low_conn = conn._get_conn(timeout=DEFAULT_POOL_TIMEOUT) try: low_conn.putrequest(request.method, url, skip_accept_encoding=True) for header, value in request.headers.items(): low_conn.putheader(header, value) low_conn.endheaders() for i in request.body: low_conn.send(hex(len(i))[2:].encode('utf-8')) low_conn.send(b'\r\n') low_conn.send(i) low_conn.send(b'\r\n') low_conn.send(b'0\r\n\r\n') # Receive the response from the server try: # For Python 2.7, use buffering of HTTP responses r = low_conn.getresponse(buffering=True) except TypeError: # For compatibility with Python 3.3+ r = low_conn.getresponse() resp = HTTPResponse.from_httplib( r, pool=conn, connection=low_conn, preload_content=False, decode_content=False ) except: # If we hit any problems here, clean up the connection. # Then, reraise so that we can handle the actual exception. low_conn.close() raise except (ProtocolError, socket.error) as err: raise ConnectionError(err, request=request) except MaxRetryError as e: if isinstance(e.reason, ConnectTimeoutError): # TODO: Remove this in 3.0.0: see #2811 if not isinstance(e.reason, NewConnectionError): raise ConnectTimeout(e, request=request) if isinstance(e.reason, ResponseError): raise RetryError(e, request=request) if isinstance(e.reason, _ProxyError): raise ProxyError(e, request=request) if isinstance(e.reason, _SSLError): # This branch is for urllib3 v1.22 and later. raise SSLError(e, request=request) raise ConnectionError(e, request=request) except ClosedPoolError as e: raise ConnectionError(e, request=request) except _ProxyError as e: raise ProxyError(e) except (_SSLError, _HTTPError) as e: if isinstance(e, _SSLError): # This branch is for urllib3 versions earlier than v1.22 raise SSLError(e, request=request) elif isinstance(e, ReadTimeoutError): raise ReadTimeout(e, request=request) else: raise return self.build_response(request, resp)