urllib3についてメモ
document
https://urllib3.readthedocs.io/en/latest/index.html
コードの流れを追う
import urllib3 #1 http = urllib3.PoolManager() #2 r = http.request('GET', 'http://httpbin.org/robots.txt') #3
#1
import urllib3
https://github.com/urllib3/urllib3/blob/master/src/urllib3/__init__.py#L11
urllib3をインポートしたときにディレクトリ内の__init__.pyでPoolManagerもインポートされる。
#2
http = urllib3.PoolManager()
urllib3.PoolManagerのインスタンスhttpを生成する。これでrequestメソッドの呼び出しの準備になる。
#3
r = http.request('GET', 'http://httpbin.org/robots.txt')
PoolManager.request()を実行しているが、正確にはPoolManagerの親クラスRequestMethod.request()メソッドを実行している。
https://github.com/urllib3/urllib3/blob/master/src/urllib3/request.py#L10
urllib3.request.RequestMethods(object).request()
def request(self, method, url, fields=None, headers=None, **urlopen_kw): """ Make a request using :meth:`urlopen` with the appropriate encoding of ``fields`` based on the ``method`` used. This is a convenience method that requires the least amount of manual effort. It can be used in most situations, while still having the option to drop down to more specific methods when necessary, such as :meth:`request_encode_url`, :meth:`request_encode_body`, or even the lowest level :meth:`urlopen`. """ method = method.upper() urlopen_kw["request_url"] = url if method in self._encode_url_methods: return self.request_encode_url( method, url, fields=fields, headers=headers, **urlopen_kw ) else: return self.request_encode_body( method, url, fields=fields, headers=headers, **urlopen_kw )
request()メソッドでは、methodが_encode_url_methodsに含まれているものならばurllib3.request.RequestMethods(object).request_encode_url()メソッドを実行する。具体的に言えばリクエストメソッドが"DELETE", "GET", "HEAD", "OPTIONS"のどれかであれば、RequestMethods(object).request_encode_url()メソッドを実行する。一方で_encode_url_methodsにmethodがない場合は、urllib3.request.RequestMethods(object).request_encode_body()メソッドを実行する。
_encode_url_methodsはセット型データでRequestMethods(object)の属性である。_encode_url_methods = {"DELETE", "GET", "HEAD", "OPTIONS"}
https://github.com/urllib3/urllib3/blob/master/src/urllib3/request.py#L39
urllib3.request.RequestMethods(object).request_encode_url()
def request_encode_url(self, method, url, fields=None, headers=None, **urlopen_kw): """ Make a request using :meth:`urlopen` with the ``fields`` encoded in the url. This is useful for request methods like GET, HEAD, DELETE, etc. """ if headers is None: headers = self.headers extra_kw = {"headers": headers} extra_kw.update(urlopen_kw) if fields: url += "?" + urlencode(fields) return self.urlopen(method, url, **extra_kw)
urllib3.poolmanager.PoolManager(RequestMethods).urlopen()
def urlopen(self, method, url, redirect=True, **kw): """ Same as :meth:`urllib3.connectionpool.HTTPConnectionPool.urlopen` with custom cross-host redirect logic and only sends the request-uri portion of the ``url``. The given ``url`` parameter must be absolute, such that an appropriate :class:`urllib3.connectionpool.ConnectionPool` can be chosen for it. """ u = parse_url(url) conn = self.connection_from_host(u.host, port=u.port, scheme=u.scheme) kw["assert_same_host"] = False kw["redirect"] = False if "headers" not in kw: kw["headers"] = self.headers.copy() if self.proxy is not None and u.scheme == "http": response = conn.urlopen(method, url, **kw) else: response = conn.urlopen(method, u.request_uri, **kw) redirect_location = redirect and response.get_redirect_location() if not redirect_location: return response # Support relative URLs for redirecting. redirect_location = urljoin(url, redirect_location) # RFC 7231, Section 6.4.4 if response.status == 303: method = "GET" retries = kw.get("retries") if not isinstance(retries, Retry): retries = Retry.from_int(retries, redirect=redirect) # Strip headers marked as unsafe to forward to the redirected location. # Check remove_headers_on_redirect to avoid a potential network call within # conn.is_same_host() which may use socket.gethostbyname() in the future. if retries.remove_headers_on_redirect and not conn.is_same_host( redirect_location ): headers = list(six.iterkeys(kw["headers"])) for header in headers: if header.lower() in retries.remove_headers_on_redirect: kw["headers"].pop(header, None) try: retries = retries.increment(method, url, response=response, _pool=conn) except MaxRetryError: if retries.raise_on_redirect: raise return response kw["retries"] = retries kw["redirect"] = redirect log.info("Redirecting %s -> %s", url, redirect_location) return self.urlopen(method, redirect_location, **kw)
conn = self.connection_from_host(u.host, port=u.port, scheme=u.scheme)のconnはHTTP(s)ConnectionPoolインスタンスである。
HTTP(s)ConnectionPool.urlopen()を実行するとresponseが得られる。このconnオブジェクトを得るには以下のような複数のメソッドを実行してreturn poolとして得る。
https://github.com/urllib3/urllib3/blob/master/src/urllib3/poolmanager.py#L213
def connection_from_host(self, host, port=None, scheme="http", pool_kwargs=None): """ Get a :class:`ConnectionPool` based on the host, port, and scheme. If ``port`` isn't given, it will be derived from the ``scheme`` using ``urllib3.connectionpool.port_by_scheme``. If ``pool_kwargs`` is provided, it is merged with the instance's ``connection_pool_kw`` variable and used to create the new connection pool, if one is needed. """ if not host: raise LocationValueError("No host specified.") request_context = self._merge_pool_kwargs(pool_kwargs) request_context["scheme"] = scheme or "http" if not port: port = port_by_scheme.get(request_context["scheme"].lower(), 80) request_context["port"] = port request_context["host"] = host return self.connection_from_context(request_context)
https://github.com/urllib3/urllib3/blob/master/src/urllib3/poolmanager.py#L236
def connection_from_context(self, request_context): """ Get a :class:`ConnectionPool` based on the request context. ``request_context`` must at least contain the ``scheme`` key and its value must be a key in ``key_fn_by_scheme`` instance variable. """ scheme = request_context["scheme"].lower() pool_key_constructor = self.key_fn_by_scheme[scheme] pool_key = pool_key_constructor(request_context) return self.connection_from_pool_key(pool_key, request_context=request_context)
https://github.com/urllib3/urllib3/blob/master/src/urllib3/poolmanager.py#L249
def connection_from_pool_key(self, pool_key, request_context=None): """ Get a :class:`ConnectionPool` based on the provided pool key. ``pool_key`` should be a namedtuple that only contains immutable objects. At a minimum it must have the ``scheme``, ``host``, and ``port`` fields. """ with self.pools.lock: # If the scheme, host, or port doesn't match existing open # connections, open a new ConnectionPool. pool = self.pools.get(pool_key) if pool: return pool # Make a fresh ConnectionPool of the desired type scheme = request_context["scheme"] host = request_context["host"] port = request_context["port"] pool = self._new_pool(scheme, host, port, request_context=request_context) self.pools[pool_key] = pool return pool
戻り値はpolであるがこれは、HTTPConnectionPool又はHTTPSConnectionPoolクラスのインスタンスである。poolを新しく生成する場合は# Make a fresh ConnectionPool of the desired type以下の部分で生成される。もっと言えば_new_poolメソッドである。
https://github.com/urllib3/urllib3/blob/master/src/urllib3/poolmanager.py#L177
def _new_pool(self, scheme, host, port, request_context=None): """ Create a new :class:`ConnectionPool` based on host, port, scheme, and any additional pool keyword arguments. If ``request_context`` is provided, it is provided as keyword arguments to the pool class used. This method is used to actually create the connection pools handed out by :meth:`connection_from_url` and companion methods. It is intended to be overridden for customization. """ pool_cls = self.pool_classes_by_scheme[scheme] if request_context is None: request_context = self.connection_pool_kw.copy() # Although the context has everything necessary to create the pool, # this function has historically only used the scheme, host, and port # in the positional args. When an API change is acceptable these can # be removed. for key in ("scheme", "host", "port"): request_context.pop(key, None) if scheme == "http": for kw in SSL_KEYWORDS: request_context.pop(kw, None) return pool_cls(host, port, **request_context)