util.py 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584
  1. import math
  2. import re
  3. from qrcode import LUT, base, exceptions
  4. from qrcode.base import RSBlock
  5. # QR encoding modes.
  6. MODE_NUMBER = 1 << 0
  7. MODE_ALPHA_NUM = 1 << 1
  8. MODE_8BIT_BYTE = 1 << 2
  9. MODE_KANJI = 1 << 3
  10. # Encoding mode sizes.
  11. MODE_SIZE_SMALL = {
  12. MODE_NUMBER: 10,
  13. MODE_ALPHA_NUM: 9,
  14. MODE_8BIT_BYTE: 8,
  15. MODE_KANJI: 8,
  16. }
  17. MODE_SIZE_MEDIUM = {
  18. MODE_NUMBER: 12,
  19. MODE_ALPHA_NUM: 11,
  20. MODE_8BIT_BYTE: 16,
  21. MODE_KANJI: 10,
  22. }
  23. MODE_SIZE_LARGE = {
  24. MODE_NUMBER: 14,
  25. MODE_ALPHA_NUM: 13,
  26. MODE_8BIT_BYTE: 16,
  27. MODE_KANJI: 12,
  28. }
  29. ALPHA_NUM = b"0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ $%*+-./:"
  30. RE_ALPHA_NUM = re.compile(b"^[" + re.escape(ALPHA_NUM) + rb"]*\Z")
  31. # The number of bits for numeric delimited data lengths.
  32. NUMBER_LENGTH = {3: 10, 2: 7, 1: 4}
  33. PATTERN_POSITION_TABLE = [
  34. [],
  35. [6, 18],
  36. [6, 22],
  37. [6, 26],
  38. [6, 30],
  39. [6, 34],
  40. [6, 22, 38],
  41. [6, 24, 42],
  42. [6, 26, 46],
  43. [6, 28, 50],
  44. [6, 30, 54],
  45. [6, 32, 58],
  46. [6, 34, 62],
  47. [6, 26, 46, 66],
  48. [6, 26, 48, 70],
  49. [6, 26, 50, 74],
  50. [6, 30, 54, 78],
  51. [6, 30, 56, 82],
  52. [6, 30, 58, 86],
  53. [6, 34, 62, 90],
  54. [6, 28, 50, 72, 94],
  55. [6, 26, 50, 74, 98],
  56. [6, 30, 54, 78, 102],
  57. [6, 28, 54, 80, 106],
  58. [6, 32, 58, 84, 110],
  59. [6, 30, 58, 86, 114],
  60. [6, 34, 62, 90, 118],
  61. [6, 26, 50, 74, 98, 122],
  62. [6, 30, 54, 78, 102, 126],
  63. [6, 26, 52, 78, 104, 130],
  64. [6, 30, 56, 82, 108, 134],
  65. [6, 34, 60, 86, 112, 138],
  66. [6, 30, 58, 86, 114, 142],
  67. [6, 34, 62, 90, 118, 146],
  68. [6, 30, 54, 78, 102, 126, 150],
  69. [6, 24, 50, 76, 102, 128, 154],
  70. [6, 28, 54, 80, 106, 132, 158],
  71. [6, 32, 58, 84, 110, 136, 162],
  72. [6, 26, 54, 82, 110, 138, 166],
  73. [6, 30, 58, 86, 114, 142, 170],
  74. ]
  75. G15 = (1 << 10) | (1 << 8) | (1 << 5) | (1 << 4) | (1 << 2) | (1 << 1) | (1 << 0)
  76. G18 = (
  77. (1 << 12)
  78. | (1 << 11)
  79. | (1 << 10)
  80. | (1 << 9)
  81. | (1 << 8)
  82. | (1 << 5)
  83. | (1 << 2)
  84. | (1 << 0)
  85. )
  86. G15_MASK = (1 << 14) | (1 << 12) | (1 << 10) | (1 << 4) | (1 << 1)
  87. PAD0 = 0xEC
  88. PAD1 = 0x11
  89. # Precompute bit count limits, indexed by error correction level and code size
  90. def _data_count(block):
  91. return block.data_count
  92. BIT_LIMIT_TABLE = [
  93. [0]
  94. + [
  95. 8 * sum(map(_data_count, base.rs_blocks(version, error_correction)))
  96. for version in range(1, 41)
  97. ]
  98. for error_correction in range(4)
  99. ]
  100. def BCH_type_info(data):
  101. d = data << 10
  102. while BCH_digit(d) - BCH_digit(G15) >= 0:
  103. d ^= G15 << (BCH_digit(d) - BCH_digit(G15))
  104. return ((data << 10) | d) ^ G15_MASK
  105. def BCH_type_number(data):
  106. d = data << 12
  107. while BCH_digit(d) - BCH_digit(G18) >= 0:
  108. d ^= G18 << (BCH_digit(d) - BCH_digit(G18))
  109. return (data << 12) | d
  110. def BCH_digit(data):
  111. digit = 0
  112. while data != 0:
  113. digit += 1
  114. data >>= 1
  115. return digit
  116. def pattern_position(version):
  117. return PATTERN_POSITION_TABLE[version - 1]
  118. def mask_func(pattern):
  119. """
  120. Return the mask function for the given mask pattern.
  121. """
  122. if pattern == 0: # 000
  123. return lambda i, j: (i + j) % 2 == 0
  124. if pattern == 1: # 001
  125. return lambda i, j: i % 2 == 0
  126. if pattern == 2: # 010
  127. return lambda i, j: j % 3 == 0
  128. if pattern == 3: # 011
  129. return lambda i, j: (i + j) % 3 == 0
  130. if pattern == 4: # 100
  131. return lambda i, j: (math.floor(i / 2) + math.floor(j / 3)) % 2 == 0
  132. if pattern == 5: # 101
  133. return lambda i, j: (i * j) % 2 + (i * j) % 3 == 0
  134. if pattern == 6: # 110
  135. return lambda i, j: ((i * j) % 2 + (i * j) % 3) % 2 == 0
  136. if pattern == 7: # 111
  137. return lambda i, j: ((i * j) % 3 + (i + j) % 2) % 2 == 0
  138. raise TypeError("Bad mask pattern: " + pattern) # pragma: no cover
  139. def mode_sizes_for_version(version):
  140. if version < 10:
  141. return MODE_SIZE_SMALL
  142. elif version < 27:
  143. return MODE_SIZE_MEDIUM
  144. else:
  145. return MODE_SIZE_LARGE
  146. def length_in_bits(mode, version):
  147. if mode not in (MODE_NUMBER, MODE_ALPHA_NUM, MODE_8BIT_BYTE, MODE_KANJI):
  148. raise TypeError(f"Invalid mode ({mode})") # pragma: no cover
  149. check_version(version)
  150. return mode_sizes_for_version(version)[mode]
  151. def check_version(version):
  152. if version < 1 or version > 40:
  153. raise ValueError(f"Invalid version (was {version}, expected 1 to 40)")
  154. def lost_point(modules):
  155. modules_count = len(modules)
  156. lost_point = 0
  157. lost_point = _lost_point_level1(modules, modules_count)
  158. lost_point += _lost_point_level2(modules, modules_count)
  159. lost_point += _lost_point_level3(modules, modules_count)
  160. lost_point += _lost_point_level4(modules, modules_count)
  161. return lost_point
  162. def _lost_point_level1(modules, modules_count):
  163. lost_point = 0
  164. modules_range = range(modules_count)
  165. container = [0] * (modules_count + 1)
  166. for row in modules_range:
  167. this_row = modules[row]
  168. previous_color = this_row[0]
  169. length = 0
  170. for col in modules_range:
  171. if this_row[col] == previous_color:
  172. length += 1
  173. else:
  174. if length >= 5:
  175. container[length] += 1
  176. length = 1
  177. previous_color = this_row[col]
  178. if length >= 5:
  179. container[length] += 1
  180. for col in modules_range:
  181. previous_color = modules[0][col]
  182. length = 0
  183. for row in modules_range:
  184. if modules[row][col] == previous_color:
  185. length += 1
  186. else:
  187. if length >= 5:
  188. container[length] += 1
  189. length = 1
  190. previous_color = modules[row][col]
  191. if length >= 5:
  192. container[length] += 1
  193. lost_point += sum(
  194. container[each_length] * (each_length - 2)
  195. for each_length in range(5, modules_count + 1)
  196. )
  197. return lost_point
  198. def _lost_point_level2(modules, modules_count):
  199. lost_point = 0
  200. modules_range = range(modules_count - 1)
  201. for row in modules_range:
  202. this_row = modules[row]
  203. next_row = modules[row + 1]
  204. # use iter() and next() to skip next four-block. e.g.
  205. # d a f if top-right a != b bottom-right,
  206. # c b e then both abcd and abef won't lost any point.
  207. modules_range_iter = iter(modules_range)
  208. for col in modules_range_iter:
  209. top_right = this_row[col + 1]
  210. if top_right != next_row[col + 1]:
  211. # reduce 33.3% of runtime via next().
  212. # None: raise nothing if there is no next item.
  213. next(modules_range_iter, None)
  214. elif top_right != this_row[col]:
  215. continue
  216. elif top_right != next_row[col]:
  217. continue
  218. else:
  219. lost_point += 3
  220. return lost_point
  221. def _lost_point_level3(modules, modules_count):
  222. # 1 : 1 : 3 : 1 : 1 ratio (dark:light:dark:light:dark) pattern in
  223. # row/column, preceded or followed by light area 4 modules wide. From ISOIEC.
  224. # pattern1: 10111010000
  225. # pattern2: 00001011101
  226. modules_range = range(modules_count)
  227. modules_range_short = range(modules_count - 10)
  228. lost_point = 0
  229. for row in modules_range:
  230. this_row = modules[row]
  231. modules_range_short_iter = iter(modules_range_short)
  232. col = 0
  233. for col in modules_range_short_iter:
  234. if (
  235. not this_row[col + 1]
  236. and this_row[col + 4]
  237. and not this_row[col + 5]
  238. and this_row[col + 6]
  239. and not this_row[col + 9]
  240. and (
  241. this_row[col + 0]
  242. and this_row[col + 2]
  243. and this_row[col + 3]
  244. and not this_row[col + 7]
  245. and not this_row[col + 8]
  246. and not this_row[col + 10]
  247. or not this_row[col + 0]
  248. and not this_row[col + 2]
  249. and not this_row[col + 3]
  250. and this_row[col + 7]
  251. and this_row[col + 8]
  252. and this_row[col + 10]
  253. )
  254. ):
  255. lost_point += 40
  256. # horspool algorithm.
  257. # if this_row[col + 10]:
  258. # pattern1 shift 4, pattern2 shift 2. So min=2.
  259. # else:
  260. # pattern1 shift 1, pattern2 shift 1. So min=1.
  261. if this_row[col + 10]:
  262. next(modules_range_short_iter, None)
  263. for col in modules_range:
  264. modules_range_short_iter = iter(modules_range_short)
  265. row = 0
  266. for row in modules_range_short_iter:
  267. if (
  268. not modules[row + 1][col]
  269. and modules[row + 4][col]
  270. and not modules[row + 5][col]
  271. and modules[row + 6][col]
  272. and not modules[row + 9][col]
  273. and (
  274. modules[row + 0][col]
  275. and modules[row + 2][col]
  276. and modules[row + 3][col]
  277. and not modules[row + 7][col]
  278. and not modules[row + 8][col]
  279. and not modules[row + 10][col]
  280. or not modules[row + 0][col]
  281. and not modules[row + 2][col]
  282. and not modules[row + 3][col]
  283. and modules[row + 7][col]
  284. and modules[row + 8][col]
  285. and modules[row + 10][col]
  286. )
  287. ):
  288. lost_point += 40
  289. if modules[row + 10][col]:
  290. next(modules_range_short_iter, None)
  291. return lost_point
  292. def _lost_point_level4(modules, modules_count):
  293. dark_count = sum(map(sum, modules))
  294. percent = float(dark_count) / (modules_count**2)
  295. # Every 5% departure from 50%, rating++
  296. rating = int(abs(percent * 100 - 50) / 5)
  297. return rating * 10
  298. def optimal_data_chunks(data, minimum=4):
  299. """
  300. An iterator returning QRData chunks optimized to the data content.
  301. :param minimum: The minimum number of bytes in a row to split as a chunk.
  302. """
  303. data = to_bytestring(data)
  304. num_pattern = rb"\d"
  305. alpha_pattern = b"[" + re.escape(ALPHA_NUM) + b"]"
  306. if len(data) <= minimum:
  307. num_pattern = re.compile(b"^" + num_pattern + b"+$")
  308. alpha_pattern = re.compile(b"^" + alpha_pattern + b"+$")
  309. else:
  310. re_repeat = b"{" + str(minimum).encode("ascii") + b",}"
  311. num_pattern = re.compile(num_pattern + re_repeat)
  312. alpha_pattern = re.compile(alpha_pattern + re_repeat)
  313. num_bits = _optimal_split(data, num_pattern)
  314. for is_num, chunk in num_bits:
  315. if is_num:
  316. yield QRData(chunk, mode=MODE_NUMBER, check_data=False)
  317. else:
  318. for is_alpha, sub_chunk in _optimal_split(chunk, alpha_pattern):
  319. mode = MODE_ALPHA_NUM if is_alpha else MODE_8BIT_BYTE
  320. yield QRData(sub_chunk, mode=mode, check_data=False)
  321. def _optimal_split(data, pattern):
  322. while data:
  323. match = re.search(pattern, data)
  324. if not match:
  325. break
  326. start, end = match.start(), match.end()
  327. if start:
  328. yield False, data[:start]
  329. yield True, data[start:end]
  330. data = data[end:]
  331. if data:
  332. yield False, data
  333. def to_bytestring(data):
  334. """
  335. Convert data to a (utf-8 encoded) byte-string if it isn't a byte-string
  336. already.
  337. """
  338. if not isinstance(data, bytes):
  339. data = str(data).encode("utf-8")
  340. return data
  341. def optimal_mode(data):
  342. """
  343. Calculate the optimal mode for this chunk of data.
  344. """
  345. if data.isdigit():
  346. return MODE_NUMBER
  347. if RE_ALPHA_NUM.match(data):
  348. return MODE_ALPHA_NUM
  349. return MODE_8BIT_BYTE
  350. class QRData:
  351. """
  352. Data held in a QR compatible format.
  353. Doesn't currently handle KANJI.
  354. """
  355. def __init__(self, data, mode=None, check_data=True):
  356. """
  357. If ``mode`` isn't provided, the most compact QR data type possible is
  358. chosen.
  359. """
  360. if check_data:
  361. data = to_bytestring(data)
  362. if mode is None:
  363. self.mode = optimal_mode(data)
  364. else:
  365. self.mode = mode
  366. if mode not in (MODE_NUMBER, MODE_ALPHA_NUM, MODE_8BIT_BYTE):
  367. raise TypeError(f"Invalid mode ({mode})") # pragma: no cover
  368. if check_data and mode < optimal_mode(data): # pragma: no cover
  369. raise ValueError(f"Provided data can not be represented in mode {mode}")
  370. self.data = data
  371. def __len__(self):
  372. return len(self.data)
  373. def write(self, buffer):
  374. if self.mode == MODE_NUMBER:
  375. for i in range(0, len(self.data), 3):
  376. chars = self.data[i : i + 3]
  377. bit_length = NUMBER_LENGTH[len(chars)]
  378. buffer.put(int(chars), bit_length)
  379. elif self.mode == MODE_ALPHA_NUM:
  380. for i in range(0, len(self.data), 2):
  381. chars = self.data[i : i + 2]
  382. if len(chars) > 1:
  383. buffer.put(
  384. ALPHA_NUM.find(chars[0]) * 45 + ALPHA_NUM.find(chars[1]), 11
  385. )
  386. else:
  387. buffer.put(ALPHA_NUM.find(chars), 6)
  388. else:
  389. # Iterating a bytestring in Python 3 returns an integer,
  390. # no need to ord().
  391. data = self.data
  392. for c in data:
  393. buffer.put(c, 8)
  394. def __repr__(self):
  395. return repr(self.data)
  396. class BitBuffer:
  397. def __init__(self):
  398. self.buffer: list[int] = []
  399. self.length = 0
  400. def __repr__(self):
  401. return ".".join([str(n) for n in self.buffer])
  402. def get(self, index):
  403. buf_index = math.floor(index / 8)
  404. return ((self.buffer[buf_index] >> (7 - index % 8)) & 1) == 1
  405. def put(self, num, length):
  406. for i in range(length):
  407. self.put_bit(((num >> (length - i - 1)) & 1) == 1)
  408. def __len__(self):
  409. return self.length
  410. def put_bit(self, bit):
  411. buf_index = self.length // 8
  412. if len(self.buffer) <= buf_index:
  413. self.buffer.append(0)
  414. if bit:
  415. self.buffer[buf_index] |= 0x80 >> (self.length % 8)
  416. self.length += 1
  417. def create_bytes(buffer: BitBuffer, rs_blocks: list[RSBlock]):
  418. offset = 0
  419. maxDcCount = 0
  420. maxEcCount = 0
  421. dcdata: list[list[int]] = []
  422. ecdata: list[list[int]] = []
  423. for rs_block in rs_blocks:
  424. dcCount = rs_block.data_count
  425. ecCount = rs_block.total_count - dcCount
  426. maxDcCount = max(maxDcCount, dcCount)
  427. maxEcCount = max(maxEcCount, ecCount)
  428. current_dc = [0xFF & buffer.buffer[i + offset] for i in range(dcCount)]
  429. offset += dcCount
  430. # Get error correction polynomial.
  431. if ecCount in LUT.rsPoly_LUT:
  432. rsPoly = base.Polynomial(LUT.rsPoly_LUT[ecCount], 0)
  433. else:
  434. rsPoly = base.Polynomial([1], 0)
  435. for i in range(ecCount):
  436. rsPoly = rsPoly * base.Polynomial([1, base.gexp(i)], 0)
  437. rawPoly = base.Polynomial(current_dc, len(rsPoly) - 1)
  438. modPoly = rawPoly % rsPoly
  439. current_ec = []
  440. mod_offset = len(modPoly) - ecCount
  441. for i in range(ecCount):
  442. modIndex = i + mod_offset
  443. current_ec.append(modPoly[modIndex] if (modIndex >= 0) else 0)
  444. dcdata.append(current_dc)
  445. ecdata.append(current_ec)
  446. data = []
  447. for i in range(maxDcCount):
  448. for dc in dcdata:
  449. if i < len(dc):
  450. data.append(dc[i])
  451. for i in range(maxEcCount):
  452. for ec in ecdata:
  453. if i < len(ec):
  454. data.append(ec[i])
  455. return data
  456. def create_data(version, error_correction, data_list):
  457. buffer = BitBuffer()
  458. for data in data_list:
  459. buffer.put(data.mode, 4)
  460. buffer.put(len(data), length_in_bits(data.mode, version))
  461. data.write(buffer)
  462. # Calculate the maximum number of bits for the given version.
  463. rs_blocks = base.rs_blocks(version, error_correction)
  464. bit_limit = sum(block.data_count * 8 for block in rs_blocks)
  465. if len(buffer) > bit_limit:
  466. raise exceptions.DataOverflowError(
  467. "Code length overflow. Data size (%s) > size available (%s)"
  468. % (len(buffer), bit_limit)
  469. )
  470. # Terminate the bits (add up to four 0s).
  471. for _ in range(min(bit_limit - len(buffer), 4)):
  472. buffer.put_bit(False)
  473. # Delimit the string into 8-bit words, padding with 0s if necessary.
  474. delimit = len(buffer) % 8
  475. if delimit:
  476. for _ in range(8 - delimit):
  477. buffer.put_bit(False)
  478. # Add special alternating padding bitstrings until buffer is full.
  479. bytes_to_fill = (bit_limit - len(buffer)) // 8
  480. for i in range(bytes_to_fill):
  481. if i % 2 == 0:
  482. buffer.put(PAD0, 8)
  483. else:
  484. buffer.put(PAD1, 8)
  485. return create_bytes(buffer, rs_blocks)