| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211 |
- from __future__ import annotations
- import re
- import typing as t
- from urllib.parse import quote
- from .._internal import _plain_int
- from ..exceptions import SecurityError
- from ..http import parse_set_header
- from ..urls import uri_to_iri
- _host_re = re.compile(
- r"""
- (
- [a-z0-9.-]+ # domain or ipv4
- |
- \[[a-f0-9]*:[a-f0-9.:]+] # ipv6
- )
- (?::[0-9]+)? # optional port
- """,
- flags=re.ASCII | re.IGNORECASE | re.VERBOSE,
- )
- def host_is_trusted(
- hostname: str | None, trusted_list: t.Collection[str] | None = None
- ) -> bool:
- """Perform some checks on a ``Host`` header ``host:port``. The host must be
- made up of valid characters, but this does not check validity beyond that.
- If a list of trusted domains is given, the domain must match one.
- :param hostname: The ``Host`` header ``host:port`` to check.
- :param trusted_list: A list of trusted domains to match. These should
- already be IDNA encoded, but will be encoded if needed. The port is
- ignored for this check. If a name starts with a dot it will match as a
- suffix, accepting all subdomains. If empty or ``None``, all domains are
- allowed.
- .. versionchanged:: 3.2
- The value's characters are validated.
- .. versionchanged:: 3.2
- ``trusted_list`` defaults to ``None``.
- .. versionadded:: 0.9
- """
- if not hostname:
- return False
- if _host_re.fullmatch(hostname) is None:
- return False
- hostname = hostname.partition(":")[0]
- if not trusted_list:
- return True
- if isinstance(trusted_list, str):
- trusted_list = [trusted_list]
- for ref in trusted_list:
- if ref.startswith("."):
- ref = ref[1:]
- suffix_match = True
- else:
- suffix_match = False
- try:
- ref = ref.partition(":")[0].encode("idna").decode("ascii")
- except UnicodeEncodeError:
- return False
- if ref == hostname or (suffix_match and hostname.endswith(f".{ref}")):
- return True
- return False
- def get_host(
- scheme: str,
- host_header: str | None,
- server: tuple[str, int | None] | None = None,
- trusted_hosts: t.Collection[str] | None = None,
- ) -> str:
- """Get and validate a request's ``host:port`` based on the given values.
- The ``Host`` header sent by the client is preferred. Otherwise, the server's
- configured address is used. The port is omitted if it matches the standard
- HTTP or HTTPS ports.
- The value is passed through :func:`host_is_trusted`. The host must be made
- up of valid characters, but this does not check validity beyond that. If a
- list of trusted domains is given, the domain must match one.
- :param scheme: The protocol of the request. Used to omit the standard ports
- 80 and 443.
- :param host_header: The ``Host`` header value.
- :param server: The server's configured address ``(host, port)``. The server
- may be using a Unix socket and give ``(path, None)``; this is ignored as
- it would not produce a useful host value.
- :param trusted_hosts: A list of trusted domains to match. These should
- already be IDNA encoded, but will be encoded if needed. The port is
- ignored for this check. If a name starts with a dot it will match as a
- suffix, accepting all subdomains. If empty or ``None``, all domains are
- allowed.
- :return: Host, with port if necessary.
- :raise .SecurityError: If the host is not trusted.
- .. versionchanged:: 3.2
- The characters of the host value are validated. The empty string is no
- longer allowed if no header value is available.
- .. versionchanged:: 3.2
- When using the server address, Unix sockets are ignored.
- .. versionchanged:: 3.1.3
- If ``SERVER_NAME`` is IPv6, it is wrapped in ``[]``.
- """
- if host_header is not None:
- host = host_header
- # The port server[1] will be None for a Unix socket. Ignore in that case.
- elif server is not None and server[1] is not None:
- host = server[0]
- # If SERVER_NAME is IPv6, wrap it in [] to match Host header.
- # Check for : because domain or IPv4 can't have that.
- if ":" in host and host[0] != "[":
- host = f"[{host}]"
- host = f"{host}:{server[1]}"
- else:
- host = ""
- if scheme in {"http", "ws"}:
- host = host.removesuffix(":80")
- elif scheme in {"https", "wss"}:
- host = host.removesuffix(":443")
- if not host_is_trusted(host, trusted_hosts):
- raise SecurityError(f"Host {host!r} is not trusted.")
- return host
- def get_current_url(
- scheme: str,
- host: str,
- root_path: str | None = None,
- path: str | None = None,
- query_string: bytes | None = None,
- ) -> str:
- """Recreate the URL for a request. If an optional part isn't
- provided, it and subsequent parts are not included in the URL.
- The URL is an IRI, not a URI, so it may contain Unicode characters.
- Use :func:`~werkzeug.urls.iri_to_uri` to convert it to ASCII.
- :param scheme: The protocol the request used, like ``"https"``.
- :param host: The host the request was made to. See :func:`get_host`.
- :param root_path: Prefix that the application is mounted under. This
- is prepended to ``path``.
- :param path: The path part of the URL after ``root_path``.
- :param query_string: The portion of the URL after the "?".
- """
- url = [scheme, "://", host]
- if root_path is None:
- url.append("/")
- return uri_to_iri("".join(url))
- # safe = https://url.spec.whatwg.org/#url-path-segment-string
- # as well as percent for things that are already quoted
- url.append(quote(root_path.rstrip("/"), safe="!$&'()*+,/:;=@%"))
- url.append("/")
- if path is None:
- return uri_to_iri("".join(url))
- url.append(quote(path.lstrip("/"), safe="!$&'()*+,/:;=@%"))
- if query_string:
- url.append("?")
- url.append(quote(query_string, safe="!$&'()*+,/:;=?@%"))
- return uri_to_iri("".join(url))
- def get_content_length(
- http_content_length: str | None = None,
- http_transfer_encoding: str | None = None,
- ) -> int | None:
- """Return the ``Content-Length`` header value as an int. If the header is not given
- or the ``Transfer-Encoding`` header is ``chunked``, ``None`` is returned to indicate
- a streaming request. If the value is not an integer, or negative, 0 is returned.
- :param http_content_length: The Content-Length HTTP header.
- :param http_transfer_encoding: The Transfer-Encoding HTTP header.
- .. versionadded:: 2.2
- """
- if (
- http_transfer_encoding is not None
- and "chunked" in parse_set_header(http_transfer_encoding)
- ) or http_content_length is None:
- return None
- try:
- return max(0, _plain_int(http_content_length))
- except ValueError:
- return 0
|