utils.py 6.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211
  1. from __future__ import annotations
  2. import re
  3. import typing as t
  4. from urllib.parse import quote
  5. from .._internal import _plain_int
  6. from ..exceptions import SecurityError
  7. from ..http import parse_set_header
  8. from ..urls import uri_to_iri
  9. _host_re = re.compile(
  10. r"""
  11. (
  12. [a-z0-9.-]+ # domain or ipv4
  13. |
  14. \[[a-f0-9]*:[a-f0-9.:]+] # ipv6
  15. )
  16. (?::[0-9]+)? # optional port
  17. """,
  18. flags=re.ASCII | re.IGNORECASE | re.VERBOSE,
  19. )
  20. def host_is_trusted(
  21. hostname: str | None, trusted_list: t.Collection[str] | None = None
  22. ) -> bool:
  23. """Perform some checks on a ``Host`` header ``host:port``. The host must be
  24. made up of valid characters, but this does not check validity beyond that.
  25. If a list of trusted domains is given, the domain must match one.
  26. :param hostname: The ``Host`` header ``host:port`` to check.
  27. :param trusted_list: A list of trusted domains to match. These should
  28. already be IDNA encoded, but will be encoded if needed. The port is
  29. ignored for this check. If a name starts with a dot it will match as a
  30. suffix, accepting all subdomains. If empty or ``None``, all domains are
  31. allowed.
  32. .. versionchanged:: 3.2
  33. The value's characters are validated.
  34. .. versionchanged:: 3.2
  35. ``trusted_list`` defaults to ``None``.
  36. .. versionadded:: 0.9
  37. """
  38. if not hostname:
  39. return False
  40. if _host_re.fullmatch(hostname) is None:
  41. return False
  42. hostname = hostname.partition(":")[0]
  43. if not trusted_list:
  44. return True
  45. if isinstance(trusted_list, str):
  46. trusted_list = [trusted_list]
  47. for ref in trusted_list:
  48. if ref.startswith("."):
  49. ref = ref[1:]
  50. suffix_match = True
  51. else:
  52. suffix_match = False
  53. try:
  54. ref = ref.partition(":")[0].encode("idna").decode("ascii")
  55. except UnicodeEncodeError:
  56. return False
  57. if ref == hostname or (suffix_match and hostname.endswith(f".{ref}")):
  58. return True
  59. return False
  60. def get_host(
  61. scheme: str,
  62. host_header: str | None,
  63. server: tuple[str, int | None] | None = None,
  64. trusted_hosts: t.Collection[str] | None = None,
  65. ) -> str:
  66. """Get and validate a request's ``host:port`` based on the given values.
  67. The ``Host`` header sent by the client is preferred. Otherwise, the server's
  68. configured address is used. The port is omitted if it matches the standard
  69. HTTP or HTTPS ports.
  70. The value is passed through :func:`host_is_trusted`. The host must be made
  71. up of valid characters, but this does not check validity beyond that. If a
  72. list of trusted domains is given, the domain must match one.
  73. :param scheme: The protocol of the request. Used to omit the standard ports
  74. 80 and 443.
  75. :param host_header: The ``Host`` header value.
  76. :param server: The server's configured address ``(host, port)``. The server
  77. may be using a Unix socket and give ``(path, None)``; this is ignored as
  78. it would not produce a useful host value.
  79. :param trusted_hosts: A list of trusted domains to match. These should
  80. already be IDNA encoded, but will be encoded if needed. The port is
  81. ignored for this check. If a name starts with a dot it will match as a
  82. suffix, accepting all subdomains. If empty or ``None``, all domains are
  83. allowed.
  84. :return: Host, with port if necessary.
  85. :raise .SecurityError: If the host is not trusted.
  86. .. versionchanged:: 3.2
  87. The characters of the host value are validated. The empty string is no
  88. longer allowed if no header value is available.
  89. .. versionchanged:: 3.2
  90. When using the server address, Unix sockets are ignored.
  91. .. versionchanged:: 3.1.3
  92. If ``SERVER_NAME`` is IPv6, it is wrapped in ``[]``.
  93. """
  94. if host_header is not None:
  95. host = host_header
  96. # The port server[1] will be None for a Unix socket. Ignore in that case.
  97. elif server is not None and server[1] is not None:
  98. host = server[0]
  99. # If SERVER_NAME is IPv6, wrap it in [] to match Host header.
  100. # Check for : because domain or IPv4 can't have that.
  101. if ":" in host and host[0] != "[":
  102. host = f"[{host}]"
  103. host = f"{host}:{server[1]}"
  104. else:
  105. host = ""
  106. if scheme in {"http", "ws"}:
  107. host = host.removesuffix(":80")
  108. elif scheme in {"https", "wss"}:
  109. host = host.removesuffix(":443")
  110. if not host_is_trusted(host, trusted_hosts):
  111. raise SecurityError(f"Host {host!r} is not trusted.")
  112. return host
  113. def get_current_url(
  114. scheme: str,
  115. host: str,
  116. root_path: str | None = None,
  117. path: str | None = None,
  118. query_string: bytes | None = None,
  119. ) -> str:
  120. """Recreate the URL for a request. If an optional part isn't
  121. provided, it and subsequent parts are not included in the URL.
  122. The URL is an IRI, not a URI, so it may contain Unicode characters.
  123. Use :func:`~werkzeug.urls.iri_to_uri` to convert it to ASCII.
  124. :param scheme: The protocol the request used, like ``"https"``.
  125. :param host: The host the request was made to. See :func:`get_host`.
  126. :param root_path: Prefix that the application is mounted under. This
  127. is prepended to ``path``.
  128. :param path: The path part of the URL after ``root_path``.
  129. :param query_string: The portion of the URL after the "?".
  130. """
  131. url = [scheme, "://", host]
  132. if root_path is None:
  133. url.append("/")
  134. return uri_to_iri("".join(url))
  135. # safe = https://url.spec.whatwg.org/#url-path-segment-string
  136. # as well as percent for things that are already quoted
  137. url.append(quote(root_path.rstrip("/"), safe="!$&'()*+,/:;=@%"))
  138. url.append("/")
  139. if path is None:
  140. return uri_to_iri("".join(url))
  141. url.append(quote(path.lstrip("/"), safe="!$&'()*+,/:;=@%"))
  142. if query_string:
  143. url.append("?")
  144. url.append(quote(query_string, safe="!$&'()*+,/:;=?@%"))
  145. return uri_to_iri("".join(url))
  146. def get_content_length(
  147. http_content_length: str | None = None,
  148. http_transfer_encoding: str | None = None,
  149. ) -> int | None:
  150. """Return the ``Content-Length`` header value as an int. If the header is not given
  151. or the ``Transfer-Encoding`` header is ``chunked``, ``None`` is returned to indicate
  152. a streaming request. If the value is not an integer, or negative, 0 is returned.
  153. :param http_content_length: The Content-Length HTTP header.
  154. :param http_transfer_encoding: The Transfer-Encoding HTTP header.
  155. .. versionadded:: 2.2
  156. """
  157. if (
  158. http_transfer_encoding is not None
  159. and "chunked" in parse_set_header(http_transfer_encoding)
  160. ) or http_content_length is None:
  161. return None
  162. try:
  163. return max(0, _plain_int(http_content_length))
  164. except ValueError:
  165. return 0