330 lines
9.7 KiB
Python
330 lines
9.7 KiB
Python
"""
|
|
Handlers for Content-Encoding.
|
|
|
|
See: https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Content-Encoding
|
|
"""
|
|
from __future__ import annotations
|
|
|
|
import codecs
|
|
import io
|
|
import typing
|
|
import zlib
|
|
|
|
from ._compat import brotli
|
|
from ._exceptions import DecodingError
|
|
|
|
|
|
class ContentDecoder:
|
|
def decode(self, data: bytes) -> bytes:
|
|
raise NotImplementedError() # pragma: no cover
|
|
|
|
def flush(self) -> bytes:
|
|
raise NotImplementedError() # pragma: no cover
|
|
|
|
|
|
class IdentityDecoder(ContentDecoder):
|
|
"""
|
|
Handle unencoded data.
|
|
"""
|
|
|
|
def decode(self, data: bytes) -> bytes:
|
|
return data
|
|
|
|
def flush(self) -> bytes:
|
|
return b""
|
|
|
|
|
|
class DeflateDecoder(ContentDecoder):
|
|
"""
|
|
Handle 'deflate' decoding.
|
|
|
|
See: https://stackoverflow.com/questions/1838699
|
|
"""
|
|
|
|
def __init__(self) -> None:
|
|
self.first_attempt = True
|
|
self.decompressor = zlib.decompressobj()
|
|
|
|
def decode(self, data: bytes) -> bytes:
|
|
was_first_attempt = self.first_attempt
|
|
self.first_attempt = False
|
|
try:
|
|
return self.decompressor.decompress(data)
|
|
except zlib.error as exc:
|
|
if was_first_attempt:
|
|
self.decompressor = zlib.decompressobj(-zlib.MAX_WBITS)
|
|
return self.decode(data)
|
|
raise DecodingError(str(exc)) from exc
|
|
|
|
def flush(self) -> bytes:
|
|
try:
|
|
return self.decompressor.flush()
|
|
except zlib.error as exc: # pragma: no cover
|
|
raise DecodingError(str(exc)) from exc
|
|
|
|
|
|
class GZipDecoder(ContentDecoder):
|
|
"""
|
|
Handle 'gzip' decoding.
|
|
|
|
See: https://stackoverflow.com/questions/1838699
|
|
"""
|
|
|
|
def __init__(self) -> None:
|
|
self.decompressor = zlib.decompressobj(zlib.MAX_WBITS | 16)
|
|
|
|
def decode(self, data: bytes) -> bytes:
|
|
try:
|
|
return self.decompressor.decompress(data)
|
|
except zlib.error as exc:
|
|
raise DecodingError(str(exc)) from exc
|
|
|
|
def flush(self) -> bytes:
|
|
try:
|
|
return self.decompressor.flush()
|
|
except zlib.error as exc: # pragma: no cover
|
|
raise DecodingError(str(exc)) from exc
|
|
|
|
|
|
class BrotliDecoder(ContentDecoder):
|
|
"""
|
|
Handle 'brotli' decoding.
|
|
|
|
Requires `pip install brotlipy`. See: https://brotlipy.readthedocs.io/
|
|
or `pip install brotli`. See https://github.com/google/brotli
|
|
Supports both 'brotlipy' and 'Brotli' packages since they share an import
|
|
name. The top branches are for 'brotlipy' and bottom branches for 'Brotli'
|
|
"""
|
|
|
|
def __init__(self) -> None:
|
|
if brotli is None: # pragma: no cover
|
|
raise ImportError(
|
|
"Using 'BrotliDecoder', but neither of the 'brotlicffi' or 'brotli' "
|
|
"packages have been installed. "
|
|
"Make sure to install httpx using `pip install httpx[brotli]`."
|
|
) from None
|
|
|
|
self.decompressor = brotli.Decompressor()
|
|
self.seen_data = False
|
|
self._decompress: typing.Callable[[bytes], bytes]
|
|
if hasattr(self.decompressor, "decompress"):
|
|
# The 'brotlicffi' package.
|
|
self._decompress = self.decompressor.decompress # pragma: no cover
|
|
else:
|
|
# The 'brotli' package.
|
|
self._decompress = self.decompressor.process # pragma: no cover
|
|
|
|
def decode(self, data: bytes) -> bytes:
|
|
if not data:
|
|
return b""
|
|
self.seen_data = True
|
|
try:
|
|
return self._decompress(data)
|
|
except brotli.error as exc:
|
|
raise DecodingError(str(exc)) from exc
|
|
|
|
def flush(self) -> bytes:
|
|
if not self.seen_data:
|
|
return b""
|
|
try:
|
|
if hasattr(self.decompressor, "finish"):
|
|
# Only available in the 'brotlicffi' package.
|
|
|
|
# As the decompressor decompresses eagerly, this
|
|
# will never actually emit any data. However, it will potentially throw
|
|
# errors if a truncated or damaged data stream has been used.
|
|
self.decompressor.finish() # pragma: no cover
|
|
return b""
|
|
except brotli.error as exc: # pragma: no cover
|
|
raise DecodingError(str(exc)) from exc
|
|
|
|
|
|
class MultiDecoder(ContentDecoder):
|
|
"""
|
|
Handle the case where multiple encodings have been applied.
|
|
"""
|
|
|
|
def __init__(self, children: typing.Sequence[ContentDecoder]) -> None:
|
|
"""
|
|
'children' should be a sequence of decoders in the order in which
|
|
each was applied.
|
|
"""
|
|
# Note that we reverse the order for decoding.
|
|
self.children = list(reversed(children))
|
|
|
|
def decode(self, data: bytes) -> bytes:
|
|
for child in self.children:
|
|
data = child.decode(data)
|
|
return data
|
|
|
|
def flush(self) -> bytes:
|
|
data = b""
|
|
for child in self.children:
|
|
data = child.decode(data) + child.flush()
|
|
return data
|
|
|
|
|
|
class ByteChunker:
|
|
"""
|
|
Handles returning byte content in fixed-size chunks.
|
|
"""
|
|
|
|
def __init__(self, chunk_size: int | None = None) -> None:
|
|
self._buffer = io.BytesIO()
|
|
self._chunk_size = chunk_size
|
|
|
|
def decode(self, content: bytes) -> list[bytes]:
|
|
if self._chunk_size is None:
|
|
return [content] if content else []
|
|
|
|
self._buffer.write(content)
|
|
if self._buffer.tell() >= self._chunk_size:
|
|
value = self._buffer.getvalue()
|
|
chunks = [
|
|
value[i : i + self._chunk_size]
|
|
for i in range(0, len(value), self._chunk_size)
|
|
]
|
|
if len(chunks[-1]) == self._chunk_size:
|
|
self._buffer.seek(0)
|
|
self._buffer.truncate()
|
|
return chunks
|
|
else:
|
|
self._buffer.seek(0)
|
|
self._buffer.write(chunks[-1])
|
|
self._buffer.truncate()
|
|
return chunks[:-1]
|
|
else:
|
|
return []
|
|
|
|
def flush(self) -> list[bytes]:
|
|
value = self._buffer.getvalue()
|
|
self._buffer.seek(0)
|
|
self._buffer.truncate()
|
|
return [value] if value else []
|
|
|
|
|
|
class TextChunker:
|
|
"""
|
|
Handles returning text content in fixed-size chunks.
|
|
"""
|
|
|
|
def __init__(self, chunk_size: int | None = None) -> None:
|
|
self._buffer = io.StringIO()
|
|
self._chunk_size = chunk_size
|
|
|
|
def decode(self, content: str) -> list[str]:
|
|
if self._chunk_size is None:
|
|
return [content] if content else []
|
|
|
|
self._buffer.write(content)
|
|
if self._buffer.tell() >= self._chunk_size:
|
|
value = self._buffer.getvalue()
|
|
chunks = [
|
|
value[i : i + self._chunk_size]
|
|
for i in range(0, len(value), self._chunk_size)
|
|
]
|
|
if len(chunks[-1]) == self._chunk_size:
|
|
self._buffer.seek(0)
|
|
self._buffer.truncate()
|
|
return chunks
|
|
else:
|
|
self._buffer.seek(0)
|
|
self._buffer.write(chunks[-1])
|
|
self._buffer.truncate()
|
|
return chunks[:-1]
|
|
else:
|
|
return []
|
|
|
|
def flush(self) -> list[str]:
|
|
value = self._buffer.getvalue()
|
|
self._buffer.seek(0)
|
|
self._buffer.truncate()
|
|
return [value] if value else []
|
|
|
|
|
|
class TextDecoder:
|
|
"""
|
|
Handles incrementally decoding bytes into text
|
|
"""
|
|
|
|
def __init__(self, encoding: str = "utf-8") -> None:
|
|
self.decoder = codecs.getincrementaldecoder(encoding)(errors="replace")
|
|
|
|
def decode(self, data: bytes) -> str:
|
|
return self.decoder.decode(data)
|
|
|
|
def flush(self) -> str:
|
|
return self.decoder.decode(b"", True)
|
|
|
|
|
|
class LineDecoder:
|
|
"""
|
|
Handles incrementally reading lines from text.
|
|
|
|
Has the same behaviour as the stdllib splitlines,
|
|
but handling the input iteratively.
|
|
"""
|
|
|
|
def __init__(self) -> None:
|
|
self.buffer: list[str] = []
|
|
self.trailing_cr: bool = False
|
|
|
|
def decode(self, text: str) -> list[str]:
|
|
# See https://docs.python.org/3/library/stdtypes.html#str.splitlines
|
|
NEWLINE_CHARS = "\n\r\x0b\x0c\x1c\x1d\x1e\x85\u2028\u2029"
|
|
|
|
# We always push a trailing `\r` into the next decode iteration.
|
|
if self.trailing_cr:
|
|
text = "\r" + text
|
|
self.trailing_cr = False
|
|
if text.endswith("\r"):
|
|
self.trailing_cr = True
|
|
text = text[:-1]
|
|
|
|
if not text:
|
|
# NOTE: the edge case input of empty text doesn't occur in practice,
|
|
# because other httpx internals filter out this value
|
|
return [] # pragma: no cover
|
|
|
|
trailing_newline = text[-1] in NEWLINE_CHARS
|
|
lines = text.splitlines()
|
|
|
|
if len(lines) == 1 and not trailing_newline:
|
|
# No new lines, buffer the input and continue.
|
|
self.buffer.append(lines[0])
|
|
return []
|
|
|
|
if self.buffer:
|
|
# Include any existing buffer in the first portion of the
|
|
# splitlines result.
|
|
lines = ["".join(self.buffer) + lines[0]] + lines[1:]
|
|
self.buffer = []
|
|
|
|
if not trailing_newline:
|
|
# If the last segment of splitlines is not newline terminated,
|
|
# then drop it from our output and start a new buffer.
|
|
self.buffer = [lines.pop()]
|
|
|
|
return lines
|
|
|
|
def flush(self) -> list[str]:
|
|
if not self.buffer and not self.trailing_cr:
|
|
return []
|
|
|
|
lines = ["".join(self.buffer)]
|
|
self.buffer = []
|
|
self.trailing_cr = False
|
|
return lines
|
|
|
|
|
|
SUPPORTED_DECODERS = {
|
|
"identity": IdentityDecoder,
|
|
"gzip": GZipDecoder,
|
|
"deflate": DeflateDecoder,
|
|
"br": BrotliDecoder,
|
|
}
|
|
|
|
|
|
if brotli is None:
|
|
SUPPORTED_DECODERS.pop("br") # pragma: no cover
|