help.py 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409
  1. import binascii
  2. import ctypes
  3. import hashlib
  4. import json
  5. import random
  6. import re
  7. import string
  8. import time
  9. import urllib.parse
  10. from xml.etree import ElementTree
  11. import requests
  12. def sign(uri, data=None, ctime=None, a1="", b1=""):
  13. """
  14. takes in a URI (uniform resource identifier), an optional data dictionary, and an optional ctime parameter. It returns a dictionary containing two keys: "x-s" and "x-t".
  15. """
  16. def h(n):
  17. m = ""
  18. d = "A4NjFqYu5wPHsO0XTdDgMa2r1ZQocVte9UJBvk6/7=yRnhISGKblCWi+LpfE8xzm3"
  19. for i in range(0, 32, 3):
  20. o = ord(n[i])
  21. g = ord(n[i + 1]) if i + 1 < 32 else 0
  22. h = ord(n[i + 2]) if i + 2 < 32 else 0
  23. x = ((o & 3) << 4) | (g >> 4)
  24. p = ((15 & g) << 2) | (h >> 6)
  25. v = o >> 2
  26. b = h & 63 if h else 64
  27. if not g:
  28. p = b = 64
  29. m += d[v] + d[x] + d[p] + d[b]
  30. return m
  31. v = int(round(time.time() * 1000) if not ctime else ctime)
  32. raw_str = f"{v}test{uri}{json.dumps(data, separators=(',', ':'), ensure_ascii=False) if isinstance(data, dict) else ''}"
  33. md5_str = hashlib.md5(raw_str.encode('utf-8')).hexdigest()
  34. x_s = h(md5_str)
  35. x_t = str(v)
  36. common = {
  37. "s0": 5, # getPlatformCode
  38. "s1": "",
  39. "x0": "1", # localStorage.getItem("b1b1")
  40. "x1": "3.2.0", # version
  41. "x2": "Windows",
  42. "x3": "xhs-pc-web",
  43. "x4": "2.3.1",
  44. "x5": a1, # cookie of a1
  45. "x6": x_t,
  46. "x7": x_s,
  47. "x8": b1, # localStorage.getItem("b1")
  48. "x9": mrc(x_t + x_s),
  49. "x10": 1, # getSigCount
  50. }
  51. encodeStr = encodeUtf8(json.dumps(common, separators=(',', ':')))
  52. x_s_common = b64Encode(encodeStr)
  53. return {
  54. "x-s": x_s,
  55. "x-t": x_t,
  56. "x-s-common": x_s_common,
  57. }
  58. def get_a1_and_web_id():
  59. """generate a1 and webid cookie str, the first return value is a1, second is webId
  60. for example: a1, web_id = get_a1_and_web_id()
  61. """
  62. def random_str(length):
  63. alphabet = string.ascii_letters + string.digits
  64. return ''.join(random.choice(alphabet) for _ in range(length))
  65. d = hex(int(time.time() * 1000))[2:] + random_str(30) + "5" + "0" + "000"
  66. g = (d + str(binascii.crc32(str(d).encode('utf-8'))))[:52]
  67. return g, hashlib.md5(g.encode('utf-8')).hexdigest()
  68. img_cdns = [
  69. "https://sns-img-qc.xhscdn.com",
  70. "https://sns-img-hw.xhscdn.com",
  71. "https://sns-img-bd.xhscdn.com",
  72. "https://sns-img-qn.xhscdn.com",
  73. ]
  74. def get_img_url_by_trace_id(trace_id: str, format: str = "png"):
  75. return f"{random.choice(img_cdns)}/{trace_id}?imageView2/format/{format}"
  76. def get_img_urls_by_trace_id(trace_id: str, format: str = "png"):
  77. return [f"{cdn}/{trace_id}?imageView2/format/{format}" for cdn in img_cdns]
  78. def get_trace_id(img_url: str):
  79. trace_id = img_url.split("/")[-1].split("!")[0]
  80. if "spectrum" in img_url:
  81. return "spectrum/" + trace_id
  82. return trace_id
  83. def get_imgs_url_from_note(note) -> list:
  84. """the return type is [img1_url, img2_url, ...]"""
  85. imgs = note["image_list"]
  86. if not len(imgs):
  87. return []
  88. return [get_img_url_by_trace_id(get_trace_id(img["info_list"][0]["url"])) for img in imgs]
  89. def get_imgs_urls_from_note(note) -> list:
  90. """the return type is [[img1_url1, img1_url2, img1_url3], [img2_url, img2_url2, img2_url3], ...]"""
  91. imgs = note["image_list"]
  92. if not len(imgs):
  93. return []
  94. return [get_img_urls_by_trace_id(img["trace_id"]) for img in imgs]
  95. video_cdns = [
  96. "https://sns-video-qc.xhscdn.com",
  97. "https://sns-video-hw.xhscdn.com",
  98. "https://sns-video-bd.xhscdn.com",
  99. "https://sns-video-qn.xhscdn.com",
  100. ]
  101. def get_video_url_from_note(note) -> str:
  102. if not note.get("video"):
  103. return ""
  104. origin_video_key = note['video']['consumer']['origin_video_key']
  105. return f"{random.choice(video_cdns)}/{origin_video_key}"
  106. def get_video_urls_from_note(note) -> list:
  107. if not note.get("video"):
  108. return []
  109. origin_video_key = note['video']['consumer']['origin_video_key']
  110. return [f"{cdn}/{origin_video_key}" for cdn in video_cdns]
  111. def download_file(url: str, filename: str):
  112. with requests.get(url, stream=True) as r:
  113. r.raise_for_status()
  114. with open(filename, 'wb') as f:
  115. for chunk in r.iter_content(chunk_size=8192):
  116. f.write(chunk)
  117. def get_valid_path_name(text):
  118. invalid_chars = '<>:"/\\|?*'
  119. return re.sub('[{}]'.format(re.escape(invalid_chars)), '_', text)
  120. def mrc(e):
  121. ie = [
  122. 0, 1996959894, 3993919788, 2567524794, 124634137, 1886057615, 3915621685,
  123. 2657392035, 249268274, 2044508324, 3772115230, 2547177864, 162941995,
  124. 2125561021, 3887607047, 2428444049, 498536548, 1789927666, 4089016648,
  125. 2227061214, 450548861, 1843258603, 4107580753, 2211677639, 325883990,
  126. 1684777152, 4251122042, 2321926636, 335633487, 1661365465, 4195302755,
  127. 2366115317, 997073096, 1281953886, 3579855332, 2724688242, 1006888145,
  128. 1258607687, 3524101629, 2768942443, 901097722, 1119000684, 3686517206,
  129. 2898065728, 853044451, 1172266101, 3705015759, 2882616665, 651767980,
  130. 1373503546, 3369554304, 3218104598, 565507253, 1454621731, 3485111705,
  131. 3099436303, 671266974, 1594198024, 3322730930, 2970347812, 795835527,
  132. 1483230225, 3244367275, 3060149565, 1994146192, 31158534, 2563907772,
  133. 4023717930, 1907459465, 112637215, 2680153253, 3904427059, 2013776290,
  134. 251722036, 2517215374, 3775830040, 2137656763, 141376813, 2439277719,
  135. 3865271297, 1802195444, 476864866, 2238001368, 4066508878, 1812370925,
  136. 453092731, 2181625025, 4111451223, 1706088902, 314042704, 2344532202,
  137. 4240017532, 1658658271, 366619977, 2362670323, 4224994405, 1303535960,
  138. 984961486, 2747007092, 3569037538, 1256170817, 1037604311, 2765210733,
  139. 3554079995, 1131014506, 879679996, 2909243462, 3663771856, 1141124467,
  140. 855842277, 2852801631, 3708648649, 1342533948, 654459306, 3188396048,
  141. 3373015174, 1466479909, 544179635, 3110523913, 3462522015, 1591671054,
  142. 702138776, 2966460450, 3352799412, 1504918807, 783551873, 3082640443,
  143. 3233442989, 3988292384, 2596254646, 62317068, 1957810842, 3939845945,
  144. 2647816111, 81470997, 1943803523, 3814918930, 2489596804, 225274430,
  145. 2053790376, 3826175755, 2466906013, 167816743, 2097651377, 4027552580,
  146. 2265490386, 503444072, 1762050814, 4150417245, 2154129355, 426522225,
  147. 1852507879, 4275313526, 2312317920, 282753626, 1742555852, 4189708143,
  148. 2394877945, 397917763, 1622183637, 3604390888, 2714866558, 953729732,
  149. 1340076626, 3518719985, 2797360999, 1068828381, 1219638859, 3624741850,
  150. 2936675148, 906185462, 1090812512, 3747672003, 2825379669, 829329135,
  151. 1181335161, 3412177804, 3160834842, 628085408, 1382605366, 3423369109,
  152. 3138078467, 570562233, 1426400815, 3317316542, 2998733608, 733239954,
  153. 1555261956, 3268935591, 3050360625, 752459403, 1541320221, 2607071920,
  154. 3965973030, 1969922972, 40735498, 2617837225, 3943577151, 1913087877,
  155. 83908371, 2512341634, 3803740692, 2075208622, 213261112, 2463272603,
  156. 3855990285, 2094854071, 198958881, 2262029012, 4057260610, 1759359992,
  157. 534414190, 2176718541, 4139329115, 1873836001, 414664567, 2282248934,
  158. 4279200368, 1711684554, 285281116, 2405801727, 4167216745, 1634467795,
  159. 376229701, 2685067896, 3608007406, 1308918612, 956543938, 2808555105,
  160. 3495958263, 1231636301, 1047427035, 2932959818, 3654703836, 1088359270,
  161. 936918000, 2847714899, 3736837829, 1202900863, 817233897, 3183342108,
  162. 3401237130, 1404277552, 615818150, 3134207493, 3453421203, 1423857449,
  163. 601450431, 3009837614, 3294710456, 1567103746, 711928724, 3020668471,
  164. 3272380065, 1510334235, 755167117,
  165. ]
  166. o = -1
  167. def right_without_sign(num, bit=0) -> int:
  168. val = ctypes.c_uint32(num).value >> bit
  169. MAX32INT = 4294967295
  170. return (val + (MAX32INT + 1)) % (2 * (MAX32INT + 1)) - MAX32INT - 1
  171. for n in range(57):
  172. o = ie[(o & 255) ^ ord(e[n])] ^ right_without_sign(o, 8)
  173. return o ^ -1 ^ 3988292384
  174. lookup = [
  175. "Z",
  176. "m",
  177. "s",
  178. "e",
  179. "r",
  180. "b",
  181. "B",
  182. "o",
  183. "H",
  184. "Q",
  185. "t",
  186. "N",
  187. "P",
  188. "+",
  189. "w",
  190. "O",
  191. "c",
  192. "z",
  193. "a",
  194. "/",
  195. "L",
  196. "p",
  197. "n",
  198. "g",
  199. "G",
  200. "8",
  201. "y",
  202. "J",
  203. "q",
  204. "4",
  205. "2",
  206. "K",
  207. "W",
  208. "Y",
  209. "j",
  210. "0",
  211. "D",
  212. "S",
  213. "f",
  214. "d",
  215. "i",
  216. "k",
  217. "x",
  218. "3",
  219. "V",
  220. "T",
  221. "1",
  222. "6",
  223. "I",
  224. "l",
  225. "U",
  226. "A",
  227. "F",
  228. "M",
  229. "9",
  230. "7",
  231. "h",
  232. "E",
  233. "C",
  234. "v",
  235. "u",
  236. "R",
  237. "X",
  238. "5",
  239. ]
  240. def tripletToBase64(e):
  241. return (
  242. lookup[63 & (e >> 18)] + lookup[63 & (e >> 12)] + lookup[(e >> 6) & 63] + lookup[e & 63]
  243. )
  244. def encodeChunk(e, t, r):
  245. m = []
  246. for b in range(t, r, 3):
  247. n = (16711680 & (e[b] << 16)) + \
  248. ((e[b + 1] << 8) & 65280) + (e[b + 2] & 255)
  249. m.append(tripletToBase64(n))
  250. return ''.join(m)
  251. def b64Encode(e):
  252. P = len(e)
  253. W = P % 3
  254. U = []
  255. z = 16383
  256. H = 0
  257. Z = P - W
  258. while H < Z:
  259. U.append(encodeChunk(e, H, Z if H + z > Z else H + z))
  260. H += z
  261. if 1 == W:
  262. F = e[P - 1]
  263. U.append(lookup[F >> 2] + lookup[(F << 4) & 63] + "==")
  264. elif 2 == W:
  265. F = (e[P - 2] << 8) + e[P - 1]
  266. U.append(lookup[F >> 10] + lookup[63 & (F >> 4)] + lookup[(F << 2) & 63] + "=")
  267. return "".join(U)
  268. def encodeUtf8(e):
  269. b = []
  270. m = urllib.parse.quote(e, safe='~()*!.\'')
  271. w = 0
  272. while w < len(m):
  273. T = m[w]
  274. if T == "%":
  275. E = m[w + 1] + m[w + 2]
  276. S = int(E, 16)
  277. b.append(S)
  278. w += 2
  279. else:
  280. b.append(ord(T[0]))
  281. w += 1
  282. return b
  283. def base36encode(number, alphabet='0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ'):
  284. """Converts an integer to a base36 string."""
  285. if not isinstance(number, int):
  286. raise TypeError('number must be an integer')
  287. base36 = ''
  288. sign = ''
  289. if number < 0:
  290. sign = '-'
  291. number = -number
  292. if 0 <= number < len(alphabet):
  293. return sign + alphabet[number]
  294. while number != 0:
  295. number, i = divmod(number, len(alphabet))
  296. base36 = alphabet[i] + base36
  297. return sign + base36
  298. def base36decode(number):
  299. return int(number, 36)
  300. def xml_to_dict(element):
  301. result = {}
  302. for child in element:
  303. if child:
  304. child_dict = xml_to_dict(child)
  305. if child.tag in result:
  306. if type(result[child.tag]) is list:
  307. result[child.tag].append(child_dict)
  308. else:
  309. result[child.tag] = [result[child.tag], child_dict]
  310. else:
  311. result[child.tag] = child_dict
  312. else:
  313. result[child.tag] = child.text
  314. return result
  315. def parse_xml(xml_string):
  316. root = ElementTree.fromstring(xml_string)
  317. return xml_to_dict(root)
  318. def get_search_id():
  319. e = int(time.time() * 1000) << 64
  320. t = int(random.uniform(0, 2147483646))
  321. return base36encode((e + t))
  322. def cookie_str_to_cookie_dict(cookie_str: str):
  323. cookie_blocks = [cookie_block.split("=")
  324. for cookie_block in cookie_str.split(";") if cookie_block]
  325. return {cookie[0].strip(): cookie[1].strip() for cookie in cookie_blocks}
  326. def cookie_jar_to_cookie_str(cookie_jar):
  327. cookie_dict = requests.utils.dict_from_cookiejar(cookie_jar)
  328. return ";".join([f"{key}={value}" for key, value in cookie_dict.items()])
  329. def update_session_cookies_from_cookie(session: requests.Session, cookie: str):
  330. cookie_dict = cookie_str_to_cookie_dict(cookie) if cookie else {}
  331. if "a1" not in cookie_dict or "webId" not in cookie_dict:
  332. # a1, web_id = get_a1_and_web_id()
  333. cookie_dict |= {"a1": "187d2defea8dz1fgwydnci40kw265ikh9fsxn66qs50000726043",
  334. "webId": "ba57f42593b9e55840a289fa0b755374"}
  335. if "gid" not in cookie_dict:
  336. cookie_dict |= {
  337. "gid.sign": "PSF1M3U6EBC/Jv6eGddPbmsWzLI=",
  338. "gid": "yYWfJfi820jSyYWfJfdidiKK0YfuyikEvfISMAM348TEJC28K23TxI888WJK84q8S4WfY2Sy"
  339. }
  340. new_cookies = requests.utils.cookiejar_from_dict(cookie_dict)
  341. session.cookies = new_cookies