| 1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120 |
- import asyncio
- import enum
- import io
- import json
- import mimetypes
- import os
- import sys
- import warnings
- from abc import ABC, abstractmethod
- from collections.abc import Iterable
- from itertools import chain
- from typing import (
- IO,
- TYPE_CHECKING,
- Any,
- Dict,
- Final,
- List,
- Optional,
- Set,
- TextIO,
- Tuple,
- Type,
- Union,
- )
- from multidict import CIMultiDict
- from . import hdrs
- from .abc import AbstractStreamWriter
- from .helpers import (
- _SENTINEL,
- content_disposition_header,
- guess_filename,
- parse_mimetype,
- sentinel,
- )
- from .streams import StreamReader
- from .typedefs import JSONEncoder, _CIMultiDict
- __all__ = (
- "PAYLOAD_REGISTRY",
- "get_payload",
- "payload_type",
- "Payload",
- "BytesPayload",
- "StringPayload",
- "IOBasePayload",
- "BytesIOPayload",
- "BufferedReaderPayload",
- "TextIOPayload",
- "StringIOPayload",
- "JsonPayload",
- "AsyncIterablePayload",
- )
- TOO_LARGE_BYTES_BODY: Final[int] = 2**20 # 1 MB
- READ_SIZE: Final[int] = 2**16 # 64 KB
- _CLOSE_FUTURES: Set[asyncio.Future[None]] = set()
- class LookupError(Exception):
- """Raised when no payload factory is found for the given data type."""
- class Order(str, enum.Enum):
- normal = "normal"
- try_first = "try_first"
- try_last = "try_last"
- def get_payload(data: Any, *args: Any, **kwargs: Any) -> "Payload":
- return PAYLOAD_REGISTRY.get(data, *args, **kwargs)
- def register_payload(
- factory: Type["Payload"], type: Any, *, order: Order = Order.normal
- ) -> None:
- PAYLOAD_REGISTRY.register(factory, type, order=order)
- class payload_type:
- def __init__(self, type: Any, *, order: Order = Order.normal) -> None:
- self.type = type
- self.order = order
- def __call__(self, factory: Type["Payload"]) -> Type["Payload"]:
- register_payload(factory, self.type, order=self.order)
- return factory
- PayloadType = Type["Payload"]
- _PayloadRegistryItem = Tuple[PayloadType, Any]
- class PayloadRegistry:
- """Payload registry.
- note: we need zope.interface for more efficient adapter search
- """
- __slots__ = ("_first", "_normal", "_last", "_normal_lookup")
- def __init__(self) -> None:
- self._first: List[_PayloadRegistryItem] = []
- self._normal: List[_PayloadRegistryItem] = []
- self._last: List[_PayloadRegistryItem] = []
- self._normal_lookup: Dict[Any, PayloadType] = {}
- def get(
- self,
- data: Any,
- *args: Any,
- _CHAIN: "Type[chain[_PayloadRegistryItem]]" = chain,
- **kwargs: Any,
- ) -> "Payload":
- if self._first:
- for factory, type_ in self._first:
- if isinstance(data, type_):
- return factory(data, *args, **kwargs)
- # Try the fast lookup first
- if lookup_factory := self._normal_lookup.get(type(data)):
- return lookup_factory(data, *args, **kwargs)
- # Bail early if its already a Payload
- if isinstance(data, Payload):
- return data
- # Fallback to the slower linear search
- for factory, type_ in _CHAIN(self._normal, self._last):
- if isinstance(data, type_):
- return factory(data, *args, **kwargs)
- raise LookupError()
- def register(
- self, factory: PayloadType, type: Any, *, order: Order = Order.normal
- ) -> None:
- if order is Order.try_first:
- self._first.append((factory, type))
- elif order is Order.normal:
- self._normal.append((factory, type))
- if isinstance(type, Iterable):
- for t in type:
- self._normal_lookup[t] = factory
- else:
- self._normal_lookup[type] = factory
- elif order is Order.try_last:
- self._last.append((factory, type))
- else:
- raise ValueError(f"Unsupported order {order!r}")
- class Payload(ABC):
- _default_content_type: str = "application/octet-stream"
- _size: Optional[int] = None
- _consumed: bool = False # Default: payload has not been consumed yet
- _autoclose: bool = False # Default: assume resource needs explicit closing
- def __init__(
- self,
- value: Any,
- headers: Optional[
- Union[_CIMultiDict, Dict[str, str], Iterable[Tuple[str, str]]]
- ] = None,
- content_type: Union[str, None, _SENTINEL] = sentinel,
- filename: Optional[str] = None,
- encoding: Optional[str] = None,
- **kwargs: Any,
- ) -> None:
- self._encoding = encoding
- self._filename = filename
- self._headers: _CIMultiDict = CIMultiDict()
- self._value = value
- if content_type is not sentinel and content_type is not None:
- self._headers[hdrs.CONTENT_TYPE] = content_type
- elif self._filename is not None:
- if sys.version_info >= (3, 13):
- guesser = mimetypes.guess_file_type
- else:
- guesser = mimetypes.guess_type
- content_type = guesser(self._filename)[0]
- if content_type is None:
- content_type = self._default_content_type
- self._headers[hdrs.CONTENT_TYPE] = content_type
- else:
- self._headers[hdrs.CONTENT_TYPE] = self._default_content_type
- if headers:
- self._headers.update(headers)
- @property
- def size(self) -> Optional[int]:
- """Size of the payload in bytes.
- Returns the number of bytes that will be transmitted when the payload
- is written. For string payloads, this is the size after encoding to bytes,
- not the length of the string.
- """
- return self._size
- @property
- def filename(self) -> Optional[str]:
- """Filename of the payload."""
- return self._filename
- @property
- def headers(self) -> _CIMultiDict:
- """Custom item headers"""
- return self._headers
- @property
- def _binary_headers(self) -> bytes:
- return (
- "".join([k + ": " + v + "\r\n" for k, v in self.headers.items()]).encode(
- "utf-8"
- )
- + b"\r\n"
- )
- @property
- def encoding(self) -> Optional[str]:
- """Payload encoding"""
- return self._encoding
- @property
- def content_type(self) -> str:
- """Content type"""
- return self._headers[hdrs.CONTENT_TYPE]
- @property
- def consumed(self) -> bool:
- """Whether the payload has been consumed and cannot be reused."""
- return self._consumed
- @property
- def autoclose(self) -> bool:
- """
- Whether the payload can close itself automatically.
- Returns True if the payload has no file handles or resources that need
- explicit closing. If False, callers must await close() to release resources.
- """
- return self._autoclose
- def set_content_disposition(
- self,
- disptype: str,
- quote_fields: bool = True,
- _charset: str = "utf-8",
- **params: Any,
- ) -> None:
- """Sets ``Content-Disposition`` header."""
- self._headers[hdrs.CONTENT_DISPOSITION] = content_disposition_header(
- disptype, quote_fields=quote_fields, _charset=_charset, **params
- )
- @abstractmethod
- def decode(self, encoding: str = "utf-8", errors: str = "strict") -> str:
- """
- Return string representation of the value.
- This is named decode() to allow compatibility with bytes objects.
- """
- @abstractmethod
- async def write(self, writer: AbstractStreamWriter) -> None:
- """
- Write payload to the writer stream.
- Args:
- writer: An AbstractStreamWriter instance that handles the actual writing
- This is a legacy method that writes the entire payload without length constraints.
- Important:
- For new implementations, use write_with_length() instead of this method.
- This method is maintained for backwards compatibility and will eventually
- delegate to write_with_length(writer, None) in all implementations.
- All payload subclasses must override this method for backwards compatibility,
- but new code should use write_with_length for more flexibility and control.
- """
- # write_with_length is new in aiohttp 3.12
- # it should be overridden by subclasses
- async def write_with_length(
- self, writer: AbstractStreamWriter, content_length: Optional[int]
- ) -> None:
- """
- Write payload with a specific content length constraint.
- Args:
- writer: An AbstractStreamWriter instance that handles the actual writing
- content_length: Maximum number of bytes to write (None for unlimited)
- This method allows writing payload content with a specific length constraint,
- which is particularly useful for HTTP responses with Content-Length header.
- Note:
- This is the base implementation that provides backwards compatibility
- for subclasses that don't override this method. Specific payload types
- should override this method to implement proper length-constrained writing.
- """
- # Backwards compatibility for subclasses that don't override this method
- # and for the default implementation
- await self.write(writer)
- async def as_bytes(self, encoding: str = "utf-8", errors: str = "strict") -> bytes:
- """
- Return bytes representation of the value.
- This is a convenience method that calls decode() and encodes the result
- to bytes using the specified encoding.
- """
- # Use instance encoding if available, otherwise use parameter
- actual_encoding = self._encoding or encoding
- return self.decode(actual_encoding, errors).encode(actual_encoding)
- def _close(self) -> None:
- """
- Async safe synchronous close operations for backwards compatibility.
- This method exists only for backwards compatibility with code that
- needs to clean up payloads synchronously. In the future, we will
- drop this method and only support the async close() method.
- WARNING: This method must be safe to call from within the event loop
- without blocking. Subclasses should not perform any blocking I/O here.
- WARNING: This method must be called from within an event loop for
- certain payload types (e.g., IOBasePayload). Calling it outside an
- event loop may raise RuntimeError.
- """
- # This is a no-op by default, but subclasses can override it
- # for non-blocking cleanup operations.
- async def close(self) -> None:
- """
- Close the payload if it holds any resources.
- IMPORTANT: This method must not await anything that might not finish
- immediately, as it may be called during cleanup/cancellation. Schedule
- any long-running operations without awaiting them.
- In the future, this will be the only close method supported.
- """
- self._close()
- class BytesPayload(Payload):
- _value: bytes
- # _consumed = False (inherited) - Bytes are immutable and can be reused
- _autoclose = True # No file handle, just bytes in memory
- def __init__(
- self, value: Union[bytes, bytearray, memoryview], *args: Any, **kwargs: Any
- ) -> None:
- if "content_type" not in kwargs:
- kwargs["content_type"] = "application/octet-stream"
- super().__init__(value, *args, **kwargs)
- if isinstance(value, memoryview):
- self._size = value.nbytes
- elif isinstance(value, (bytes, bytearray)):
- self._size = len(value)
- else:
- raise TypeError(f"value argument must be byte-ish, not {type(value)!r}")
- if self._size > TOO_LARGE_BYTES_BODY:
- kwargs = {"source": self}
- warnings.warn(
- "Sending a large body directly with raw bytes might"
- " lock the event loop. You should probably pass an "
- "io.BytesIO object instead",
- ResourceWarning,
- **kwargs,
- )
- def decode(self, encoding: str = "utf-8", errors: str = "strict") -> str:
- return self._value.decode(encoding, errors)
- async def as_bytes(self, encoding: str = "utf-8", errors: str = "strict") -> bytes:
- """
- Return bytes representation of the value.
- This method returns the raw bytes content of the payload.
- It is equivalent to accessing the _value attribute directly.
- """
- return self._value
- async def write(self, writer: AbstractStreamWriter) -> None:
- """
- Write the entire bytes payload to the writer stream.
- Args:
- writer: An AbstractStreamWriter instance that handles the actual writing
- This method writes the entire bytes content without any length constraint.
- Note:
- For new implementations that need length control, use write_with_length().
- This method is maintained for backwards compatibility and is equivalent
- to write_with_length(writer, None).
- """
- await writer.write(self._value)
- async def write_with_length(
- self, writer: AbstractStreamWriter, content_length: Optional[int]
- ) -> None:
- """
- Write bytes payload with a specific content length constraint.
- Args:
- writer: An AbstractStreamWriter instance that handles the actual writing
- content_length: Maximum number of bytes to write (None for unlimited)
- This method writes either the entire byte sequence or a slice of it
- up to the specified content_length. For BytesPayload, this operation
- is performed efficiently using array slicing.
- """
- if content_length is not None:
- await writer.write(self._value[:content_length])
- else:
- await writer.write(self._value)
- class StringPayload(BytesPayload):
- def __init__(
- self,
- value: str,
- *args: Any,
- encoding: Optional[str] = None,
- content_type: Optional[str] = None,
- **kwargs: Any,
- ) -> None:
- if encoding is None:
- if content_type is None:
- real_encoding = "utf-8"
- content_type = "text/plain; charset=utf-8"
- else:
- mimetype = parse_mimetype(content_type)
- real_encoding = mimetype.parameters.get("charset", "utf-8")
- else:
- if content_type is None:
- content_type = "text/plain; charset=%s" % encoding
- real_encoding = encoding
- super().__init__(
- value.encode(real_encoding),
- encoding=real_encoding,
- content_type=content_type,
- *args,
- **kwargs,
- )
- class StringIOPayload(StringPayload):
- def __init__(self, value: IO[str], *args: Any, **kwargs: Any) -> None:
- super().__init__(value.read(), *args, **kwargs)
- class IOBasePayload(Payload):
- _value: io.IOBase
- # _consumed = False (inherited) - File can be re-read from the same position
- _start_position: Optional[int] = None
- # _autoclose = False (inherited) - Has file handle that needs explicit closing
- def __init__(
- self, value: IO[Any], disposition: str = "attachment", *args: Any, **kwargs: Any
- ) -> None:
- if "filename" not in kwargs:
- kwargs["filename"] = guess_filename(value)
- super().__init__(value, *args, **kwargs)
- if self._filename is not None and disposition is not None:
- if hdrs.CONTENT_DISPOSITION not in self.headers:
- self.set_content_disposition(disposition, filename=self._filename)
- def _set_or_restore_start_position(self) -> None:
- """Set or restore the start position of the file-like object."""
- if self._start_position is None:
- try:
- self._start_position = self._value.tell()
- except (OSError, AttributeError):
- self._consumed = True # Cannot seek, mark as consumed
- return
- try:
- self._value.seek(self._start_position)
- except (OSError, AttributeError):
- # Failed to seek back - mark as consumed since we've already read
- self._consumed = True
- def _read_and_available_len(
- self, remaining_content_len: Optional[int]
- ) -> Tuple[Optional[int], bytes]:
- """
- Read the file-like object and return both its total size and the first chunk.
- Args:
- remaining_content_len: Optional limit on how many bytes to read in this operation.
- If None, READ_SIZE will be used as the default chunk size.
- Returns:
- A tuple containing:
- - The total size of the remaining unread content (None if size cannot be determined)
- - The first chunk of bytes read from the file object
- This method is optimized to perform both size calculation and initial read
- in a single operation, which is executed in a single executor job to minimize
- context switches and file operations when streaming content.
- """
- self._set_or_restore_start_position()
- size = self.size # Call size only once since it does I/O
- return size, self._value.read(
- min(READ_SIZE, size or READ_SIZE, remaining_content_len or READ_SIZE)
- )
- def _read(self, remaining_content_len: Optional[int]) -> bytes:
- """
- Read a chunk of data from the file-like object.
- Args:
- remaining_content_len: Optional maximum number of bytes to read.
- If None, READ_SIZE will be used as the default chunk size.
- Returns:
- A chunk of bytes read from the file object, respecting the
- remaining_content_len limit if specified.
- This method is used for subsequent reads during streaming after
- the initial _read_and_available_len call has been made.
- """
- return self._value.read(remaining_content_len or READ_SIZE) # type: ignore[no-any-return]
- @property
- def size(self) -> Optional[int]:
- """
- Size of the payload in bytes.
- Returns the total size of the payload content from the initial position.
- This ensures consistent Content-Length for requests, including 307/308 redirects
- where the same payload instance is reused.
- Returns None if the size cannot be determined (e.g., for unseekable streams).
- """
- try:
- # Store the start position on first access.
- # This is critical when the same payload instance is reused (e.g., 307/308
- # redirects). Without storing the initial position, after the payload is
- # read once, the file position would be at EOF, which would cause the
- # size calculation to return 0 (file_size - EOF position).
- # By storing the start position, we ensure the size calculation always
- # returns the correct total size for any subsequent use.
- if self._start_position is None:
- self._start_position = self._value.tell()
- # Return the total size from the start position
- # This ensures Content-Length is correct even after reading
- return os.fstat(self._value.fileno()).st_size - self._start_position
- except (AttributeError, OSError):
- return None
- async def write(self, writer: AbstractStreamWriter) -> None:
- """
- Write the entire file-like payload to the writer stream.
- Args:
- writer: An AbstractStreamWriter instance that handles the actual writing
- This method writes the entire file content without any length constraint.
- It delegates to write_with_length() with no length limit for implementation
- consistency.
- Note:
- For new implementations that need length control, use write_with_length() directly.
- This method is maintained for backwards compatibility with existing code.
- """
- await self.write_with_length(writer, None)
- async def write_with_length(
- self, writer: AbstractStreamWriter, content_length: Optional[int]
- ) -> None:
- """
- Write file-like payload with a specific content length constraint.
- Args:
- writer: An AbstractStreamWriter instance that handles the actual writing
- content_length: Maximum number of bytes to write (None for unlimited)
- This method implements optimized streaming of file content with length constraints:
- 1. File reading is performed in a thread pool to avoid blocking the event loop
- 2. Content is read and written in chunks to maintain memory efficiency
- 3. Writing stops when either:
- - All available file content has been written (when size is known)
- - The specified content_length has been reached
- 4. File resources are properly closed even if the operation is cancelled
- The implementation carefully handles both known-size and unknown-size payloads,
- as well as constrained and unconstrained content lengths.
- """
- loop = asyncio.get_running_loop()
- total_written_len = 0
- remaining_content_len = content_length
- # Get initial data and available length
- available_len, chunk = await loop.run_in_executor(
- None, self._read_and_available_len, remaining_content_len
- )
- # Process data chunks until done
- while chunk:
- chunk_len = len(chunk)
- # Write data with or without length constraint
- if remaining_content_len is None:
- await writer.write(chunk)
- else:
- await writer.write(chunk[:remaining_content_len])
- remaining_content_len -= chunk_len
- total_written_len += chunk_len
- # Check if we're done writing
- if self._should_stop_writing(
- available_len, total_written_len, remaining_content_len
- ):
- return
- # Read next chunk
- chunk = await loop.run_in_executor(
- None,
- self._read,
- (
- min(READ_SIZE, remaining_content_len)
- if remaining_content_len is not None
- else READ_SIZE
- ),
- )
- def _should_stop_writing(
- self,
- available_len: Optional[int],
- total_written_len: int,
- remaining_content_len: Optional[int],
- ) -> bool:
- """
- Determine if we should stop writing data.
- Args:
- available_len: Known size of the payload if available (None if unknown)
- total_written_len: Number of bytes already written
- remaining_content_len: Remaining bytes to be written for content-length limited responses
- Returns:
- True if we should stop writing data, based on either:
- - Having written all available data (when size is known)
- - Having written all requested content (when content-length is specified)
- """
- return (available_len is not None and total_written_len >= available_len) or (
- remaining_content_len is not None and remaining_content_len <= 0
- )
- def _close(self) -> None:
- """
- Async safe synchronous close operations for backwards compatibility.
- This method exists only for backwards
- compatibility. Use the async close() method instead.
- WARNING: This method MUST be called from within an event loop.
- Calling it outside an event loop will raise RuntimeError.
- """
- # Skip if already consumed
- if self._consumed:
- return
- self._consumed = True # Mark as consumed to prevent further writes
- # Schedule file closing without awaiting to prevent cancellation issues
- loop = asyncio.get_running_loop()
- close_future = loop.run_in_executor(None, self._value.close)
- # Hold a strong reference to the future to prevent it from being
- # garbage collected before it completes.
- _CLOSE_FUTURES.add(close_future)
- close_future.add_done_callback(_CLOSE_FUTURES.remove)
- async def close(self) -> None:
- """
- Close the payload if it holds any resources.
- IMPORTANT: This method must not await anything that might not finish
- immediately, as it may be called during cleanup/cancellation. Schedule
- any long-running operations without awaiting them.
- """
- self._close()
- def decode(self, encoding: str = "utf-8", errors: str = "strict") -> str:
- """
- Return string representation of the value.
- WARNING: This method does blocking I/O and should not be called in the event loop.
- """
- return self._read_all().decode(encoding, errors)
- def _read_all(self) -> bytes:
- """Read the entire file-like object and return its content as bytes."""
- self._set_or_restore_start_position()
- # Use readlines() to ensure we get all content
- return b"".join(self._value.readlines())
- async def as_bytes(self, encoding: str = "utf-8", errors: str = "strict") -> bytes:
- """
- Return bytes representation of the value.
- This method reads the entire file content and returns it as bytes.
- It is equivalent to reading the file-like object directly.
- The file reading is performed in an executor to avoid blocking the event loop.
- """
- loop = asyncio.get_running_loop()
- return await loop.run_in_executor(None, self._read_all)
- class TextIOPayload(IOBasePayload):
- _value: io.TextIOBase
- # _autoclose = False (inherited) - Has text file handle that needs explicit closing
- def __init__(
- self,
- value: TextIO,
- *args: Any,
- encoding: Optional[str] = None,
- content_type: Optional[str] = None,
- **kwargs: Any,
- ) -> None:
- if encoding is None:
- if content_type is None:
- encoding = "utf-8"
- content_type = "text/plain; charset=utf-8"
- else:
- mimetype = parse_mimetype(content_type)
- encoding = mimetype.parameters.get("charset", "utf-8")
- else:
- if content_type is None:
- content_type = "text/plain; charset=%s" % encoding
- super().__init__(
- value,
- content_type=content_type,
- encoding=encoding,
- *args,
- **kwargs,
- )
- def _read_and_available_len(
- self, remaining_content_len: Optional[int]
- ) -> Tuple[Optional[int], bytes]:
- """
- Read the text file-like object and return both its total size and the first chunk.
- Args:
- remaining_content_len: Optional limit on how many bytes to read in this operation.
- If None, READ_SIZE will be used as the default chunk size.
- Returns:
- A tuple containing:
- - The total size of the remaining unread content (None if size cannot be determined)
- - The first chunk of bytes read from the file object, encoded using the payload's encoding
- This method is optimized to perform both size calculation and initial read
- in a single operation, which is executed in a single executor job to minimize
- context switches and file operations when streaming content.
- Note:
- TextIOPayload handles encoding of the text content before writing it
- to the stream. If no encoding is specified, UTF-8 is used as the default.
- """
- self._set_or_restore_start_position()
- size = self.size
- chunk = self._value.read(
- min(READ_SIZE, size or READ_SIZE, remaining_content_len or READ_SIZE)
- )
- return size, chunk.encode(self._encoding) if self._encoding else chunk.encode()
- def _read(self, remaining_content_len: Optional[int]) -> bytes:
- """
- Read a chunk of data from the text file-like object.
- Args:
- remaining_content_len: Optional maximum number of bytes to read.
- If None, READ_SIZE will be used as the default chunk size.
- Returns:
- A chunk of bytes read from the file object and encoded using the payload's
- encoding. The data is automatically converted from text to bytes.
- This method is used for subsequent reads during streaming after
- the initial _read_and_available_len call has been made. It properly
- handles text encoding, converting the text content to bytes using
- the specified encoding (or UTF-8 if none was provided).
- """
- chunk = self._value.read(remaining_content_len or READ_SIZE)
- return chunk.encode(self._encoding) if self._encoding else chunk.encode()
- def decode(self, encoding: str = "utf-8", errors: str = "strict") -> str:
- """
- Return string representation of the value.
- WARNING: This method does blocking I/O and should not be called in the event loop.
- """
- self._set_or_restore_start_position()
- return self._value.read()
- async def as_bytes(self, encoding: str = "utf-8", errors: str = "strict") -> bytes:
- """
- Return bytes representation of the value.
- This method reads the entire text file content and returns it as bytes.
- It encodes the text content using the specified encoding.
- The file reading is performed in an executor to avoid blocking the event loop.
- """
- loop = asyncio.get_running_loop()
- # Use instance encoding if available, otherwise use parameter
- actual_encoding = self._encoding or encoding
- def _read_and_encode() -> bytes:
- self._set_or_restore_start_position()
- # TextIO read() always returns the full content
- return self._value.read().encode(actual_encoding, errors)
- return await loop.run_in_executor(None, _read_and_encode)
- class BytesIOPayload(IOBasePayload):
- _value: io.BytesIO
- _size: int # Always initialized in __init__
- _autoclose = True # BytesIO is in-memory, safe to auto-close
- def __init__(self, value: io.BytesIO, *args: Any, **kwargs: Any) -> None:
- super().__init__(value, *args, **kwargs)
- # Calculate size once during initialization
- self._size = len(self._value.getbuffer()) - self._value.tell()
- @property
- def size(self) -> int:
- """Size of the payload in bytes.
- Returns the number of bytes in the BytesIO buffer that will be transmitted.
- This is calculated once during initialization for efficiency.
- """
- return self._size
- def decode(self, encoding: str = "utf-8", errors: str = "strict") -> str:
- self._set_or_restore_start_position()
- return self._value.read().decode(encoding, errors)
- async def write(self, writer: AbstractStreamWriter) -> None:
- return await self.write_with_length(writer, None)
- async def write_with_length(
- self, writer: AbstractStreamWriter, content_length: Optional[int]
- ) -> None:
- """
- Write BytesIO payload with a specific content length constraint.
- Args:
- writer: An AbstractStreamWriter instance that handles the actual writing
- content_length: Maximum number of bytes to write (None for unlimited)
- This implementation is specifically optimized for BytesIO objects:
- 1. Reads content in chunks to maintain memory efficiency
- 2. Yields control back to the event loop periodically to prevent blocking
- when dealing with large BytesIO objects
- 3. Respects content_length constraints when specified
- 4. Properly cleans up by closing the BytesIO object when done or on error
- The periodic yielding to the event loop is important for maintaining
- responsiveness when processing large in-memory buffers.
- """
- self._set_or_restore_start_position()
- loop_count = 0
- remaining_bytes = content_length
- while chunk := self._value.read(READ_SIZE):
- if loop_count > 0:
- # Avoid blocking the event loop
- # if they pass a large BytesIO object
- # and we are not in the first iteration
- # of the loop
- await asyncio.sleep(0)
- if remaining_bytes is None:
- await writer.write(chunk)
- else:
- await writer.write(chunk[:remaining_bytes])
- remaining_bytes -= len(chunk)
- if remaining_bytes <= 0:
- return
- loop_count += 1
- async def as_bytes(self, encoding: str = "utf-8", errors: str = "strict") -> bytes:
- """
- Return bytes representation of the value.
- This method reads the entire BytesIO content and returns it as bytes.
- It is equivalent to accessing the _value attribute directly.
- """
- self._set_or_restore_start_position()
- return self._value.read()
- async def close(self) -> None:
- """
- Close the BytesIO payload.
- This does nothing since BytesIO is in-memory and does not require explicit closing.
- """
- class BufferedReaderPayload(IOBasePayload):
- _value: io.BufferedIOBase
- # _autoclose = False (inherited) - Has buffered file handle that needs explicit closing
- def decode(self, encoding: str = "utf-8", errors: str = "strict") -> str:
- self._set_or_restore_start_position()
- return self._value.read().decode(encoding, errors)
- class JsonPayload(BytesPayload):
- def __init__(
- self,
- value: Any,
- encoding: str = "utf-8",
- content_type: str = "application/json",
- dumps: JSONEncoder = json.dumps,
- *args: Any,
- **kwargs: Any,
- ) -> None:
- super().__init__(
- dumps(value).encode(encoding),
- content_type=content_type,
- encoding=encoding,
- *args,
- **kwargs,
- )
- if TYPE_CHECKING:
- from typing import AsyncIterable, AsyncIterator
- _AsyncIterator = AsyncIterator[bytes]
- _AsyncIterable = AsyncIterable[bytes]
- else:
- from collections.abc import AsyncIterable, AsyncIterator
- _AsyncIterator = AsyncIterator
- _AsyncIterable = AsyncIterable
- class AsyncIterablePayload(Payload):
- _iter: Optional[_AsyncIterator] = None
- _value: _AsyncIterable
- _cached_chunks: Optional[List[bytes]] = None
- # _consumed stays False to allow reuse with cached content
- _autoclose = True # Iterator doesn't need explicit closing
- def __init__(self, value: _AsyncIterable, *args: Any, **kwargs: Any) -> None:
- if not isinstance(value, AsyncIterable):
- raise TypeError(
- "value argument must support "
- "collections.abc.AsyncIterable interface, "
- "got {!r}".format(type(value))
- )
- if "content_type" not in kwargs:
- kwargs["content_type"] = "application/octet-stream"
- super().__init__(value, *args, **kwargs)
- self._iter = value.__aiter__()
- async def write(self, writer: AbstractStreamWriter) -> None:
- """
- Write the entire async iterable payload to the writer stream.
- Args:
- writer: An AbstractStreamWriter instance that handles the actual writing
- This method iterates through the async iterable and writes each chunk
- to the writer without any length constraint.
- Note:
- For new implementations that need length control, use write_with_length() directly.
- This method is maintained for backwards compatibility with existing code.
- """
- await self.write_with_length(writer, None)
- async def write_with_length(
- self, writer: AbstractStreamWriter, content_length: Optional[int]
- ) -> None:
- """
- Write async iterable payload with a specific content length constraint.
- Args:
- writer: An AbstractStreamWriter instance that handles the actual writing
- content_length: Maximum number of bytes to write (None for unlimited)
- This implementation handles streaming of async iterable content with length constraints:
- 1. If cached chunks are available, writes from them
- 2. Otherwise iterates through the async iterable one chunk at a time
- 3. Respects content_length constraints when specified
- 4. Does NOT generate cache - that's done by as_bytes()
- """
- # If we have cached chunks, use them
- if self._cached_chunks is not None:
- remaining_bytes = content_length
- for chunk in self._cached_chunks:
- if remaining_bytes is None:
- await writer.write(chunk)
- elif remaining_bytes > 0:
- await writer.write(chunk[:remaining_bytes])
- remaining_bytes -= len(chunk)
- else:
- break
- return
- # If iterator is exhausted and we don't have cached chunks, nothing to write
- if self._iter is None:
- return
- # Stream from the iterator
- remaining_bytes = content_length
- try:
- while True:
- if sys.version_info >= (3, 10):
- chunk = await anext(self._iter)
- else:
- chunk = await self._iter.__anext__()
- if remaining_bytes is None:
- await writer.write(chunk)
- # If we have a content length limit
- elif remaining_bytes > 0:
- await writer.write(chunk[:remaining_bytes])
- remaining_bytes -= len(chunk)
- # We still want to exhaust the iterator even
- # if we have reached the content length limit
- # since the file handle may not get closed by
- # the iterator if we don't do this
- except StopAsyncIteration:
- # Iterator is exhausted
- self._iter = None
- self._consumed = True # Mark as consumed when streamed without caching
- def decode(self, encoding: str = "utf-8", errors: str = "strict") -> str:
- """Decode the payload content as a string if cached chunks are available."""
- if self._cached_chunks is not None:
- return b"".join(self._cached_chunks).decode(encoding, errors)
- raise TypeError("Unable to decode - content not cached. Call as_bytes() first.")
- async def as_bytes(self, encoding: str = "utf-8", errors: str = "strict") -> bytes:
- """
- Return bytes representation of the value.
- This method reads the entire async iterable content and returns it as bytes.
- It generates and caches the chunks for future reuse.
- """
- # If we have cached chunks, return them joined
- if self._cached_chunks is not None:
- return b"".join(self._cached_chunks)
- # If iterator is exhausted and no cache, return empty
- if self._iter is None:
- return b""
- # Read all chunks and cache them
- chunks: List[bytes] = []
- async for chunk in self._iter:
- chunks.append(chunk)
- # Iterator is exhausted, cache the chunks
- self._iter = None
- self._cached_chunks = chunks
- # Keep _consumed as False to allow reuse with cached chunks
- return b"".join(chunks)
- class StreamReaderPayload(AsyncIterablePayload):
- def __init__(self, value: StreamReader, *args: Any, **kwargs: Any) -> None:
- super().__init__(value.iter_any(), *args, **kwargs)
- PAYLOAD_REGISTRY = PayloadRegistry()
- PAYLOAD_REGISTRY.register(BytesPayload, (bytes, bytearray, memoryview))
- PAYLOAD_REGISTRY.register(StringPayload, str)
- PAYLOAD_REGISTRY.register(StringIOPayload, io.StringIO)
- PAYLOAD_REGISTRY.register(TextIOPayload, io.TextIOBase)
- PAYLOAD_REGISTRY.register(BytesIOPayload, io.BytesIO)
- PAYLOAD_REGISTRY.register(BufferedReaderPayload, (io.BufferedReader, io.BufferedRandom))
- PAYLOAD_REGISTRY.register(IOBasePayload, io.IOBase)
- PAYLOAD_REGISTRY.register(StreamReaderPayload, StreamReader)
- # try_last for giving a chance to more specialized async interables like
- # multipart.BodyPartReaderPayload override the default
- PAYLOAD_REGISTRY.register(AsyncIterablePayload, AsyncIterable, order=Order.try_last)
|