_http_parser.pyx 28 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835
  1. # Based on https://github.com/MagicStack/httptools
  2. #
  3. from cpython cimport (
  4. Py_buffer,
  5. PyBUF_SIMPLE,
  6. PyBuffer_Release,
  7. PyBytes_AsString,
  8. PyBytes_AsStringAndSize,
  9. PyObject_GetBuffer,
  10. )
  11. from cpython.mem cimport PyMem_Free, PyMem_Malloc
  12. from libc.limits cimport ULLONG_MAX
  13. from libc.string cimport memcpy
  14. from multidict import CIMultiDict as _CIMultiDict, CIMultiDictProxy as _CIMultiDictProxy
  15. from yarl import URL as _URL
  16. from aiohttp import hdrs
  17. from aiohttp.helpers import DEBUG, set_exception
  18. from .http_exceptions import (
  19. BadHttpMessage,
  20. BadHttpMethod,
  21. BadStatusLine,
  22. ContentLengthError,
  23. InvalidHeader,
  24. InvalidURLError,
  25. LineTooLong,
  26. PayloadEncodingError,
  27. TransferEncodingError,
  28. )
  29. from .http_parser import DeflateBuffer as _DeflateBuffer
  30. from .http_writer import (
  31. HttpVersion as _HttpVersion,
  32. HttpVersion10 as _HttpVersion10,
  33. HttpVersion11 as _HttpVersion11,
  34. )
  35. from .streams import EMPTY_PAYLOAD as _EMPTY_PAYLOAD, StreamReader as _StreamReader
  36. cimport cython
  37. from aiohttp cimport _cparser as cparser
  38. include "_headers.pxi"
  39. from aiohttp cimport _find_header
  40. ALLOWED_UPGRADES = frozenset({"websocket"})
  41. DEF DEFAULT_FREELIST_SIZE = 250
  42. cdef extern from "Python.h":
  43. int PyByteArray_Resize(object, Py_ssize_t) except -1
  44. Py_ssize_t PyByteArray_Size(object) except -1
  45. char* PyByteArray_AsString(object)
  46. __all__ = ('HttpRequestParser', 'HttpResponseParser',
  47. 'RawRequestMessage', 'RawResponseMessage')
  48. cdef object URL = _URL
  49. cdef object URL_build = URL.build
  50. cdef object CIMultiDict = _CIMultiDict
  51. cdef object CIMultiDictProxy = _CIMultiDictProxy
  52. cdef object HttpVersion = _HttpVersion
  53. cdef object HttpVersion10 = _HttpVersion10
  54. cdef object HttpVersion11 = _HttpVersion11
  55. cdef object SEC_WEBSOCKET_KEY1 = hdrs.SEC_WEBSOCKET_KEY1
  56. cdef object CONTENT_ENCODING = hdrs.CONTENT_ENCODING
  57. cdef object EMPTY_PAYLOAD = _EMPTY_PAYLOAD
  58. cdef object StreamReader = _StreamReader
  59. cdef object DeflateBuffer = _DeflateBuffer
  60. cdef bytes EMPTY_BYTES = b""
  61. cdef inline object extend(object buf, const char* at, size_t length):
  62. cdef Py_ssize_t s
  63. cdef char* ptr
  64. s = PyByteArray_Size(buf)
  65. PyByteArray_Resize(buf, s + length)
  66. ptr = PyByteArray_AsString(buf)
  67. memcpy(ptr + s, at, length)
  68. DEF METHODS_COUNT = 46;
  69. cdef list _http_method = []
  70. for i in range(METHODS_COUNT):
  71. _http_method.append(
  72. cparser.llhttp_method_name(<cparser.llhttp_method_t> i).decode('ascii'))
  73. cdef inline str http_method_str(int i):
  74. if i < METHODS_COUNT:
  75. return <str>_http_method[i]
  76. else:
  77. return "<unknown>"
  78. cdef inline object find_header(bytes raw_header):
  79. cdef Py_ssize_t size
  80. cdef char *buf
  81. cdef int idx
  82. PyBytes_AsStringAndSize(raw_header, &buf, &size)
  83. idx = _find_header.find_header(buf, size)
  84. if idx == -1:
  85. return raw_header.decode('utf-8', 'surrogateescape')
  86. return headers[idx]
  87. @cython.freelist(DEFAULT_FREELIST_SIZE)
  88. cdef class RawRequestMessage:
  89. cdef readonly str method
  90. cdef readonly str path
  91. cdef readonly object version # HttpVersion
  92. cdef readonly object headers # CIMultiDict
  93. cdef readonly object raw_headers # tuple
  94. cdef readonly object should_close
  95. cdef readonly object compression
  96. cdef readonly object upgrade
  97. cdef readonly object chunked
  98. cdef readonly object url # yarl.URL
  99. def __init__(self, method, path, version, headers, raw_headers,
  100. should_close, compression, upgrade, chunked, url):
  101. self.method = method
  102. self.path = path
  103. self.version = version
  104. self.headers = headers
  105. self.raw_headers = raw_headers
  106. self.should_close = should_close
  107. self.compression = compression
  108. self.upgrade = upgrade
  109. self.chunked = chunked
  110. self.url = url
  111. def __repr__(self):
  112. info = []
  113. info.append(("method", self.method))
  114. info.append(("path", self.path))
  115. info.append(("version", self.version))
  116. info.append(("headers", self.headers))
  117. info.append(("raw_headers", self.raw_headers))
  118. info.append(("should_close", self.should_close))
  119. info.append(("compression", self.compression))
  120. info.append(("upgrade", self.upgrade))
  121. info.append(("chunked", self.chunked))
  122. info.append(("url", self.url))
  123. sinfo = ', '.join(name + '=' + repr(val) for name, val in info)
  124. return '<RawRequestMessage(' + sinfo + ')>'
  125. def _replace(self, **dct):
  126. cdef RawRequestMessage ret
  127. ret = _new_request_message(self.method,
  128. self.path,
  129. self.version,
  130. self.headers,
  131. self.raw_headers,
  132. self.should_close,
  133. self.compression,
  134. self.upgrade,
  135. self.chunked,
  136. self.url)
  137. if "method" in dct:
  138. ret.method = dct["method"]
  139. if "path" in dct:
  140. ret.path = dct["path"]
  141. if "version" in dct:
  142. ret.version = dct["version"]
  143. if "headers" in dct:
  144. ret.headers = dct["headers"]
  145. if "raw_headers" in dct:
  146. ret.raw_headers = dct["raw_headers"]
  147. if "should_close" in dct:
  148. ret.should_close = dct["should_close"]
  149. if "compression" in dct:
  150. ret.compression = dct["compression"]
  151. if "upgrade" in dct:
  152. ret.upgrade = dct["upgrade"]
  153. if "chunked" in dct:
  154. ret.chunked = dct["chunked"]
  155. if "url" in dct:
  156. ret.url = dct["url"]
  157. return ret
  158. cdef _new_request_message(str method,
  159. str path,
  160. object version,
  161. object headers,
  162. object raw_headers,
  163. bint should_close,
  164. object compression,
  165. bint upgrade,
  166. bint chunked,
  167. object url):
  168. cdef RawRequestMessage ret
  169. ret = RawRequestMessage.__new__(RawRequestMessage)
  170. ret.method = method
  171. ret.path = path
  172. ret.version = version
  173. ret.headers = headers
  174. ret.raw_headers = raw_headers
  175. ret.should_close = should_close
  176. ret.compression = compression
  177. ret.upgrade = upgrade
  178. ret.chunked = chunked
  179. ret.url = url
  180. return ret
  181. @cython.freelist(DEFAULT_FREELIST_SIZE)
  182. cdef class RawResponseMessage:
  183. cdef readonly object version # HttpVersion
  184. cdef readonly int code
  185. cdef readonly str reason
  186. cdef readonly object headers # CIMultiDict
  187. cdef readonly object raw_headers # tuple
  188. cdef readonly object should_close
  189. cdef readonly object compression
  190. cdef readonly object upgrade
  191. cdef readonly object chunked
  192. def __init__(self, version, code, reason, headers, raw_headers,
  193. should_close, compression, upgrade, chunked):
  194. self.version = version
  195. self.code = code
  196. self.reason = reason
  197. self.headers = headers
  198. self.raw_headers = raw_headers
  199. self.should_close = should_close
  200. self.compression = compression
  201. self.upgrade = upgrade
  202. self.chunked = chunked
  203. def __repr__(self):
  204. info = []
  205. info.append(("version", self.version))
  206. info.append(("code", self.code))
  207. info.append(("reason", self.reason))
  208. info.append(("headers", self.headers))
  209. info.append(("raw_headers", self.raw_headers))
  210. info.append(("should_close", self.should_close))
  211. info.append(("compression", self.compression))
  212. info.append(("upgrade", self.upgrade))
  213. info.append(("chunked", self.chunked))
  214. sinfo = ', '.join(name + '=' + repr(val) for name, val in info)
  215. return '<RawResponseMessage(' + sinfo + ')>'
  216. cdef _new_response_message(object version,
  217. int code,
  218. str reason,
  219. object headers,
  220. object raw_headers,
  221. bint should_close,
  222. object compression,
  223. bint upgrade,
  224. bint chunked):
  225. cdef RawResponseMessage ret
  226. ret = RawResponseMessage.__new__(RawResponseMessage)
  227. ret.version = version
  228. ret.code = code
  229. ret.reason = reason
  230. ret.headers = headers
  231. ret.raw_headers = raw_headers
  232. ret.should_close = should_close
  233. ret.compression = compression
  234. ret.upgrade = upgrade
  235. ret.chunked = chunked
  236. return ret
  237. @cython.internal
  238. cdef class HttpParser:
  239. cdef:
  240. cparser.llhttp_t* _cparser
  241. cparser.llhttp_settings_t* _csettings
  242. bytes _raw_name
  243. object _name
  244. bytes _raw_value
  245. bint _has_value
  246. object _protocol
  247. object _loop
  248. object _timer
  249. size_t _max_line_size
  250. size_t _max_field_size
  251. size_t _max_headers
  252. bint _response_with_body
  253. bint _read_until_eof
  254. bint _started
  255. object _url
  256. bytearray _buf
  257. str _path
  258. str _reason
  259. list _headers
  260. list _raw_headers
  261. bint _upgraded
  262. list _messages
  263. object _payload
  264. bint _payload_error
  265. object _payload_exception
  266. object _last_error
  267. bint _auto_decompress
  268. int _limit
  269. str _content_encoding
  270. Py_buffer py_buf
  271. def __cinit__(self):
  272. self._cparser = <cparser.llhttp_t*> \
  273. PyMem_Malloc(sizeof(cparser.llhttp_t))
  274. if self._cparser is NULL:
  275. raise MemoryError()
  276. self._csettings = <cparser.llhttp_settings_t*> \
  277. PyMem_Malloc(sizeof(cparser.llhttp_settings_t))
  278. if self._csettings is NULL:
  279. raise MemoryError()
  280. def __dealloc__(self):
  281. PyMem_Free(self._cparser)
  282. PyMem_Free(self._csettings)
  283. cdef _init(
  284. self, cparser.llhttp_type mode,
  285. object protocol, object loop, int limit,
  286. object timer=None,
  287. size_t max_line_size=8190, size_t max_headers=32768,
  288. size_t max_field_size=8190, payload_exception=None,
  289. bint response_with_body=True, bint read_until_eof=False,
  290. bint auto_decompress=True,
  291. ):
  292. cparser.llhttp_settings_init(self._csettings)
  293. cparser.llhttp_init(self._cparser, mode, self._csettings)
  294. self._cparser.data = <void*>self
  295. self._cparser.content_length = 0
  296. self._protocol = protocol
  297. self._loop = loop
  298. self._timer = timer
  299. self._buf = bytearray()
  300. self._payload = None
  301. self._payload_error = 0
  302. self._payload_exception = payload_exception
  303. self._messages = []
  304. self._raw_name = EMPTY_BYTES
  305. self._raw_value = EMPTY_BYTES
  306. self._has_value = False
  307. self._max_line_size = max_line_size
  308. self._max_headers = max_headers
  309. self._max_field_size = max_field_size
  310. self._response_with_body = response_with_body
  311. self._read_until_eof = read_until_eof
  312. self._upgraded = False
  313. self._auto_decompress = auto_decompress
  314. self._content_encoding = None
  315. self._csettings.on_url = cb_on_url
  316. self._csettings.on_status = cb_on_status
  317. self._csettings.on_header_field = cb_on_header_field
  318. self._csettings.on_header_value = cb_on_header_value
  319. self._csettings.on_headers_complete = cb_on_headers_complete
  320. self._csettings.on_body = cb_on_body
  321. self._csettings.on_message_begin = cb_on_message_begin
  322. self._csettings.on_message_complete = cb_on_message_complete
  323. self._csettings.on_chunk_header = cb_on_chunk_header
  324. self._csettings.on_chunk_complete = cb_on_chunk_complete
  325. self._last_error = None
  326. self._limit = limit
  327. cdef _process_header(self):
  328. cdef str value
  329. if self._raw_name is not EMPTY_BYTES:
  330. name = find_header(self._raw_name)
  331. value = self._raw_value.decode('utf-8', 'surrogateescape')
  332. self._headers.append((name, value))
  333. if name is CONTENT_ENCODING:
  334. self._content_encoding = value
  335. self._has_value = False
  336. self._raw_headers.append((self._raw_name, self._raw_value))
  337. self._raw_name = EMPTY_BYTES
  338. self._raw_value = EMPTY_BYTES
  339. cdef _on_header_field(self, char* at, size_t length):
  340. if self._has_value:
  341. self._process_header()
  342. if self._raw_name is EMPTY_BYTES:
  343. self._raw_name = at[:length]
  344. else:
  345. self._raw_name += at[:length]
  346. cdef _on_header_value(self, char* at, size_t length):
  347. if self._raw_value is EMPTY_BYTES:
  348. self._raw_value = at[:length]
  349. else:
  350. self._raw_value += at[:length]
  351. self._has_value = True
  352. cdef _on_headers_complete(self):
  353. self._process_header()
  354. should_close = not cparser.llhttp_should_keep_alive(self._cparser)
  355. upgrade = self._cparser.upgrade
  356. chunked = self._cparser.flags & cparser.F_CHUNKED
  357. raw_headers = tuple(self._raw_headers)
  358. headers = CIMultiDictProxy(CIMultiDict(self._headers))
  359. if self._cparser.type == cparser.HTTP_REQUEST:
  360. h_upg = headers.get("upgrade", "")
  361. allowed = upgrade and h_upg.isascii() and h_upg.lower() in ALLOWED_UPGRADES
  362. if allowed or self._cparser.method == cparser.HTTP_CONNECT:
  363. self._upgraded = True
  364. else:
  365. if upgrade and self._cparser.status_code == 101:
  366. self._upgraded = True
  367. # do not support old websocket spec
  368. if SEC_WEBSOCKET_KEY1 in headers:
  369. raise InvalidHeader(SEC_WEBSOCKET_KEY1)
  370. encoding = None
  371. enc = self._content_encoding
  372. if enc is not None:
  373. self._content_encoding = None
  374. if enc.isascii() and enc.lower() in {"gzip", "deflate", "br", "zstd"}:
  375. encoding = enc
  376. if self._cparser.type == cparser.HTTP_REQUEST:
  377. method = http_method_str(self._cparser.method)
  378. msg = _new_request_message(
  379. method, self._path,
  380. self.http_version(), headers, raw_headers,
  381. should_close, encoding, upgrade, chunked, self._url)
  382. else:
  383. msg = _new_response_message(
  384. self.http_version(), self._cparser.status_code, self._reason,
  385. headers, raw_headers, should_close, encoding,
  386. upgrade, chunked)
  387. if (
  388. ULLONG_MAX > self._cparser.content_length > 0 or chunked or
  389. self._cparser.method == cparser.HTTP_CONNECT or
  390. (self._cparser.status_code >= 199 and
  391. self._cparser.content_length == 0 and
  392. self._read_until_eof)
  393. ):
  394. payload = StreamReader(
  395. self._protocol, timer=self._timer, loop=self._loop,
  396. limit=self._limit)
  397. else:
  398. payload = EMPTY_PAYLOAD
  399. self._payload = payload
  400. if encoding is not None and self._auto_decompress:
  401. self._payload = DeflateBuffer(payload, encoding)
  402. if not self._response_with_body:
  403. payload = EMPTY_PAYLOAD
  404. self._messages.append((msg, payload))
  405. cdef _on_message_complete(self):
  406. self._payload.feed_eof()
  407. self._payload = None
  408. cdef _on_chunk_header(self):
  409. self._payload.begin_http_chunk_receiving()
  410. cdef _on_chunk_complete(self):
  411. self._payload.end_http_chunk_receiving()
  412. cdef object _on_status_complete(self):
  413. pass
  414. cdef inline http_version(self):
  415. cdef cparser.llhttp_t* parser = self._cparser
  416. if parser.http_major == 1:
  417. if parser.http_minor == 0:
  418. return HttpVersion10
  419. elif parser.http_minor == 1:
  420. return HttpVersion11
  421. return HttpVersion(parser.http_major, parser.http_minor)
  422. ### Public API ###
  423. def feed_eof(self):
  424. cdef bytes desc
  425. if self._payload is not None:
  426. if self._cparser.flags & cparser.F_CHUNKED:
  427. raise TransferEncodingError(
  428. "Not enough data to satisfy transfer length header.")
  429. elif self._cparser.flags & cparser.F_CONTENT_LENGTH:
  430. raise ContentLengthError(
  431. "Not enough data to satisfy content length header.")
  432. elif cparser.llhttp_get_errno(self._cparser) != cparser.HPE_OK:
  433. desc = cparser.llhttp_get_error_reason(self._cparser)
  434. raise PayloadEncodingError(desc.decode('latin-1'))
  435. else:
  436. self._payload.feed_eof()
  437. elif self._started:
  438. self._on_headers_complete()
  439. if self._messages:
  440. return self._messages[-1][0]
  441. def feed_data(self, data):
  442. cdef:
  443. size_t data_len
  444. size_t nb
  445. cdef cparser.llhttp_errno_t errno
  446. PyObject_GetBuffer(data, &self.py_buf, PyBUF_SIMPLE)
  447. data_len = <size_t>self.py_buf.len
  448. errno = cparser.llhttp_execute(
  449. self._cparser,
  450. <char*>self.py_buf.buf,
  451. data_len)
  452. if errno is cparser.HPE_PAUSED_UPGRADE:
  453. cparser.llhttp_resume_after_upgrade(self._cparser)
  454. nb = cparser.llhttp_get_error_pos(self._cparser) - <char*>self.py_buf.buf
  455. PyBuffer_Release(&self.py_buf)
  456. if errno not in (cparser.HPE_OK, cparser.HPE_PAUSED_UPGRADE):
  457. if self._payload_error == 0:
  458. if self._last_error is not None:
  459. ex = self._last_error
  460. self._last_error = None
  461. else:
  462. after = cparser.llhttp_get_error_pos(self._cparser)
  463. before = data[:after - <char*>self.py_buf.buf]
  464. after_b = after.split(b"\r\n", 1)[0]
  465. before = before.rsplit(b"\r\n", 1)[-1]
  466. data = before + after_b
  467. pointer = " " * (len(repr(before))-1) + "^"
  468. ex = parser_error_from_errno(self._cparser, data, pointer)
  469. self._payload = None
  470. raise ex
  471. if self._messages:
  472. messages = self._messages
  473. self._messages = []
  474. else:
  475. messages = ()
  476. if self._upgraded:
  477. return messages, True, data[nb:]
  478. else:
  479. return messages, False, b""
  480. def set_upgraded(self, val):
  481. self._upgraded = val
  482. cdef class HttpRequestParser(HttpParser):
  483. def __init__(
  484. self, protocol, loop, int limit, timer=None,
  485. size_t max_line_size=8190, size_t max_headers=32768,
  486. size_t max_field_size=8190, payload_exception=None,
  487. bint response_with_body=True, bint read_until_eof=False,
  488. bint auto_decompress=True,
  489. ):
  490. self._init(cparser.HTTP_REQUEST, protocol, loop, limit, timer,
  491. max_line_size, max_headers, max_field_size,
  492. payload_exception, response_with_body, read_until_eof,
  493. auto_decompress)
  494. cdef object _on_status_complete(self):
  495. cdef int idx1, idx2
  496. if not self._buf:
  497. return
  498. self._path = self._buf.decode('utf-8', 'surrogateescape')
  499. try:
  500. idx3 = len(self._path)
  501. if self._cparser.method == cparser.HTTP_CONNECT:
  502. # authority-form,
  503. # https://datatracker.ietf.org/doc/html/rfc7230#section-5.3.3
  504. self._url = URL.build(authority=self._path, encoded=True)
  505. elif idx3 > 1 and self._path[0] == '/':
  506. # origin-form,
  507. # https://datatracker.ietf.org/doc/html/rfc7230#section-5.3.1
  508. idx1 = self._path.find("?")
  509. if idx1 == -1:
  510. query = ""
  511. idx2 = self._path.find("#")
  512. if idx2 == -1:
  513. path = self._path
  514. fragment = ""
  515. else:
  516. path = self._path[0: idx2]
  517. fragment = self._path[idx2+1:]
  518. else:
  519. path = self._path[0:idx1]
  520. idx1 += 1
  521. idx2 = self._path.find("#", idx1+1)
  522. if idx2 == -1:
  523. query = self._path[idx1:]
  524. fragment = ""
  525. else:
  526. query = self._path[idx1: idx2]
  527. fragment = self._path[idx2+1:]
  528. self._url = URL.build(
  529. path=path,
  530. query_string=query,
  531. fragment=fragment,
  532. encoded=True,
  533. )
  534. else:
  535. # absolute-form for proxy maybe,
  536. # https://datatracker.ietf.org/doc/html/rfc7230#section-5.3.2
  537. self._url = URL(self._path, encoded=True)
  538. finally:
  539. PyByteArray_Resize(self._buf, 0)
  540. cdef class HttpResponseParser(HttpParser):
  541. def __init__(
  542. self, protocol, loop, int limit, timer=None,
  543. size_t max_line_size=8190, size_t max_headers=32768,
  544. size_t max_field_size=8190, payload_exception=None,
  545. bint response_with_body=True, bint read_until_eof=False,
  546. bint auto_decompress=True
  547. ):
  548. self._init(cparser.HTTP_RESPONSE, protocol, loop, limit, timer,
  549. max_line_size, max_headers, max_field_size,
  550. payload_exception, response_with_body, read_until_eof,
  551. auto_decompress)
  552. # Use strict parsing on dev mode, so users are warned about broken servers.
  553. if not DEBUG:
  554. cparser.llhttp_set_lenient_headers(self._cparser, 1)
  555. cparser.llhttp_set_lenient_optional_cr_before_lf(self._cparser, 1)
  556. cparser.llhttp_set_lenient_spaces_after_chunk_size(self._cparser, 1)
  557. cdef object _on_status_complete(self):
  558. if self._buf:
  559. self._reason = self._buf.decode('utf-8', 'surrogateescape')
  560. PyByteArray_Resize(self._buf, 0)
  561. else:
  562. self._reason = self._reason or ''
  563. cdef int cb_on_message_begin(cparser.llhttp_t* parser) except -1:
  564. cdef HttpParser pyparser = <HttpParser>parser.data
  565. pyparser._started = True
  566. pyparser._headers = []
  567. pyparser._raw_headers = []
  568. PyByteArray_Resize(pyparser._buf, 0)
  569. pyparser._path = None
  570. pyparser._reason = None
  571. return 0
  572. cdef int cb_on_url(cparser.llhttp_t* parser,
  573. const char *at, size_t length) except -1:
  574. cdef HttpParser pyparser = <HttpParser>parser.data
  575. try:
  576. if length > pyparser._max_line_size:
  577. raise LineTooLong(
  578. 'Status line is too long', pyparser._max_line_size, length)
  579. extend(pyparser._buf, at, length)
  580. except BaseException as ex:
  581. pyparser._last_error = ex
  582. return -1
  583. else:
  584. return 0
  585. cdef int cb_on_status(cparser.llhttp_t* parser,
  586. const char *at, size_t length) except -1:
  587. cdef HttpParser pyparser = <HttpParser>parser.data
  588. cdef str reason
  589. try:
  590. if length > pyparser._max_line_size:
  591. raise LineTooLong(
  592. 'Status line is too long', pyparser._max_line_size, length)
  593. extend(pyparser._buf, at, length)
  594. except BaseException as ex:
  595. pyparser._last_error = ex
  596. return -1
  597. else:
  598. return 0
  599. cdef int cb_on_header_field(cparser.llhttp_t* parser,
  600. const char *at, size_t length) except -1:
  601. cdef HttpParser pyparser = <HttpParser>parser.data
  602. cdef Py_ssize_t size
  603. try:
  604. pyparser._on_status_complete()
  605. size = len(pyparser._raw_name) + length
  606. if size > pyparser._max_field_size:
  607. raise LineTooLong(
  608. 'Header name is too long', pyparser._max_field_size, size)
  609. pyparser._on_header_field(at, length)
  610. except BaseException as ex:
  611. pyparser._last_error = ex
  612. return -1
  613. else:
  614. return 0
  615. cdef int cb_on_header_value(cparser.llhttp_t* parser,
  616. const char *at, size_t length) except -1:
  617. cdef HttpParser pyparser = <HttpParser>parser.data
  618. cdef Py_ssize_t size
  619. try:
  620. size = len(pyparser._raw_value) + length
  621. if size > pyparser._max_field_size:
  622. raise LineTooLong(
  623. 'Header value is too long', pyparser._max_field_size, size)
  624. pyparser._on_header_value(at, length)
  625. except BaseException as ex:
  626. pyparser._last_error = ex
  627. return -1
  628. else:
  629. return 0
  630. cdef int cb_on_headers_complete(cparser.llhttp_t* parser) except -1:
  631. cdef HttpParser pyparser = <HttpParser>parser.data
  632. try:
  633. pyparser._on_status_complete()
  634. pyparser._on_headers_complete()
  635. except BaseException as exc:
  636. pyparser._last_error = exc
  637. return -1
  638. else:
  639. if pyparser._upgraded or pyparser._cparser.method == cparser.HTTP_CONNECT:
  640. return 2
  641. else:
  642. return 0
  643. cdef int cb_on_body(cparser.llhttp_t* parser,
  644. const char *at, size_t length) except -1:
  645. cdef HttpParser pyparser = <HttpParser>parser.data
  646. cdef bytes body = at[:length]
  647. try:
  648. pyparser._payload.feed_data(body, length)
  649. except BaseException as underlying_exc:
  650. reraised_exc = underlying_exc
  651. if pyparser._payload_exception is not None:
  652. reraised_exc = pyparser._payload_exception(str(underlying_exc))
  653. set_exception(pyparser._payload, reraised_exc, underlying_exc)
  654. pyparser._payload_error = 1
  655. return -1
  656. else:
  657. return 0
  658. cdef int cb_on_message_complete(cparser.llhttp_t* parser) except -1:
  659. cdef HttpParser pyparser = <HttpParser>parser.data
  660. try:
  661. pyparser._started = False
  662. pyparser._on_message_complete()
  663. except BaseException as exc:
  664. pyparser._last_error = exc
  665. return -1
  666. else:
  667. return 0
  668. cdef int cb_on_chunk_header(cparser.llhttp_t* parser) except -1:
  669. cdef HttpParser pyparser = <HttpParser>parser.data
  670. try:
  671. pyparser._on_chunk_header()
  672. except BaseException as exc:
  673. pyparser._last_error = exc
  674. return -1
  675. else:
  676. return 0
  677. cdef int cb_on_chunk_complete(cparser.llhttp_t* parser) except -1:
  678. cdef HttpParser pyparser = <HttpParser>parser.data
  679. try:
  680. pyparser._on_chunk_complete()
  681. except BaseException as exc:
  682. pyparser._last_error = exc
  683. return -1
  684. else:
  685. return 0
  686. cdef parser_error_from_errno(cparser.llhttp_t* parser, data, pointer):
  687. cdef cparser.llhttp_errno_t errno = cparser.llhttp_get_errno(parser)
  688. cdef bytes desc = cparser.llhttp_get_error_reason(parser)
  689. err_msg = "{}:\n\n {!r}\n {}".format(desc.decode("latin-1"), data, pointer)
  690. if errno in {cparser.HPE_CB_MESSAGE_BEGIN,
  691. cparser.HPE_CB_HEADERS_COMPLETE,
  692. cparser.HPE_CB_MESSAGE_COMPLETE,
  693. cparser.HPE_CB_CHUNK_HEADER,
  694. cparser.HPE_CB_CHUNK_COMPLETE,
  695. cparser.HPE_INVALID_CONSTANT,
  696. cparser.HPE_INVALID_HEADER_TOKEN,
  697. cparser.HPE_INVALID_CONTENT_LENGTH,
  698. cparser.HPE_INVALID_CHUNK_SIZE,
  699. cparser.HPE_INVALID_EOF_STATE,
  700. cparser.HPE_INVALID_TRANSFER_ENCODING}:
  701. return BadHttpMessage(err_msg)
  702. elif errno == cparser.HPE_INVALID_METHOD:
  703. return BadHttpMethod(error=err_msg)
  704. elif errno in {cparser.HPE_INVALID_STATUS,
  705. cparser.HPE_INVALID_VERSION}:
  706. return BadStatusLine(error=err_msg)
  707. elif errno == cparser.HPE_INVALID_URL:
  708. return InvalidURLError(err_msg)
  709. return BadHttpMessage(err_msg)