WeixinVideoWorkStatisticsImportService.ts 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447
  1. /**
  2. * 视频号:作品维度「作品列表 + 按天聚合数据」→ 导入 work_day_statistics
  3. *
  4. * 流程:
  5. * 1. 获取 works 表中 platform=weixin_video 的作品(platform_video_id 存的是 exportId)
  6. * 2. 调用 post_list 接口获取作品列表,通过 exportId 匹配得到 objectId
  7. * 3. 对每个作品调用 feed_aggreagate_data_by_tab_type,取「全部」tab 的按天数据
  8. * 4. 将 browse→播放、like→点赞、comment→评论 写入 work_day_statistics(follow=关注、fav/forward 暂不入库)
  9. */
  10. import crypto from 'crypto';
  11. import { AppDataSource, PlatformAccount, Work } from '../models/index.js';
  12. import { logger } from '../utils/logger.js';
  13. import { WorkDayStatisticsService } from './WorkDayStatisticsService.js';
  14. import { CookieManager } from '../automation/cookie.js';
  15. const POST_LIST_BASE =
  16. 'https://channels.weixin.qq.com/micro/statistic/cgi-bin/mmfinderassistant-bin/statistic/post_list';
  17. const FEED_AGGREGATE_BASE =
  18. 'https://channels.weixin.qq.com/micro/statistic/cgi-bin/mmfinderassistant-bin/statistic/feed_aggreagate_data_by_tab_type';
  19. /** 列表页 _pageUrl(与浏览器「数据统计-作品」列表一致) */
  20. const POST_LIST_PAGE_URL = 'https://channels.weixin.qq.com/micro/statistic/post';
  21. /** 详情页 _pageUrl(与浏览器 postDetail 一致,feed_aggreagate 用) */
  22. const POST_DETAIL_PAGE_URL = 'https://channels.weixin.qq.com/micro/statistic/postDetail';
  23. /** 生成随机 _rid(格式如 6982df69-ff6e46a5,8hex-8hex) */
  24. function generateRandomRid(): string {
  25. const a = crypto.randomBytes(4).toString('hex');
  26. const b = crypto.randomBytes(4).toString('hex');
  27. return `${a}-${b}`;
  28. }
  29. /**
  30. * 构建带 _aid、_rid、_pageUrl 的 URL。
  31. * 若传入 sessionAid/sessionRid 则优先使用(本账号 post_list 生成的,复用于 feed_aggreagate);
  32. * 否则读环境变量 WX_VIDEO_AID、WX_VIDEO_RID。
  33. */
  34. function buildUrlWithAidRid(
  35. base: string,
  36. pageUrl: string,
  37. sessionAid?: string,
  38. sessionRid?: string
  39. ): string {
  40. const aid = sessionAid ?? process.env.WX_VIDEO_AID?.trim() ?? '';
  41. const rid = sessionRid ?? process.env.WX_VIDEO_RID?.trim() ?? '';
  42. const params = new URLSearchParams();
  43. if (aid) params.set('_aid', aid);
  44. if (rid) params.set('_rid', rid);
  45. params.set('_pageUrl', pageUrl);
  46. const qs = params.toString();
  47. return qs ? `${base}?${qs}` : base;
  48. }
  49. function tryDecryptCookieData(cookieData: string | null): string | null {
  50. if (!cookieData) return null;
  51. const raw = cookieData.trim();
  52. if (!raw) return null;
  53. try {
  54. return CookieManager.decrypt(raw);
  55. } catch {
  56. return raw;
  57. }
  58. }
  59. /** 将账号 cookie_data 转为 HTTP Cookie 头字符串 */
  60. function getCookieHeaderString(cookieData: string | null): string {
  61. const raw = tryDecryptCookieData(cookieData);
  62. if (!raw) return '';
  63. const s = raw.trim();
  64. if (!s) return '';
  65. if (s.startsWith('[') || s.startsWith('{')) {
  66. try {
  67. const parsed = JSON.parse(s);
  68. const arr = Array.isArray(parsed) ? parsed : parsed?.cookies ?? [];
  69. if (!Array.isArray(arr)) return '';
  70. return arr
  71. .map((c: { name?: string; value?: string }) => {
  72. const name = String(c?.name ?? '').trim();
  73. const value = String(c?.value ?? '').trim();
  74. return name ? `${name}=${value}` : '';
  75. })
  76. .filter(Boolean)
  77. .join('; ');
  78. } catch {
  79. return s;
  80. }
  81. }
  82. return s;
  83. }
  84. /** 从 Cookie 字符串中解析 x-wechat-uin(可选) */
  85. function getXWechatUinFromCookie(cookieHeader: string): string | undefined {
  86. const match = cookieHeader.match(/\bwxuin=(\d+)/i);
  87. return match ? match[1] : undefined;
  88. }
  89. /** 从账号 account_id 得到 _log_finder_id(去掉 weixin_video_ 前缀,保证以 @finder 结尾) */
  90. function getLogFinderId(accountId: string | null): string {
  91. if (!accountId) return '';
  92. const s = String(accountId).trim();
  93. const prefix = 'weixin_video_';
  94. const id = s.startsWith(prefix) ? s.slice(prefix.length) : s;
  95. if (!id) return '';
  96. return id.endsWith('@finder') ? id : `${id}@finder`;
  97. }
  98. function buildPostListUrl(sessionAid?: string, sessionRid?: string): string {
  99. return buildUrlWithAidRid(POST_LIST_BASE, POST_LIST_PAGE_URL, sessionAid, sessionRid);
  100. }
  101. function buildFeedAggregateUrl(sessionAid?: string, sessionRid?: string): string {
  102. return buildUrlWithAidRid(FEED_AGGREGATE_BASE, POST_DETAIL_PAGE_URL, sessionAid, sessionRid);
  103. }
  104. /** 近30天到昨天:返回 [startTime, endTime] Unix 秒(中国时间 00:00:00 起算) */
  105. function getLast30DaysRange(): { startTime: number; endTime: number; startDate: Date; endDate: Date } {
  106. const now = new Date();
  107. const yesterday = new Date(now.getFullYear(), now.getMonth(), now.getDate() - 1);
  108. const startDate = new Date(yesterday.getFullYear(), yesterday.getMonth(), yesterday.getDate() - 30);
  109. startDate.setHours(0, 0, 0, 0);
  110. yesterday.setHours(23, 59, 59, 999);
  111. const startTime = Math.floor(startDate.getTime() / 1000);
  112. const endTime = Math.floor(yesterday.getTime() / 1000);
  113. const endDateNorm = new Date(yesterday.getFullYear(), yesterday.getMonth(), yesterday.getDate());
  114. endDateNorm.setHours(0, 0, 0, 0);
  115. return { startTime, endTime, startDate, endDate: endDateNorm };
  116. }
  117. function toInt(val: unknown, defaultVal = 0): number {
  118. if (typeof val === 'number') return Number.isFinite(val) ? Math.round(val) : defaultVal;
  119. if (typeof val === 'string') {
  120. const n = parseInt(val, 10);
  121. return Number.isFinite(n) ? n : defaultVal;
  122. }
  123. return defaultVal;
  124. }
  125. interface PostListItem {
  126. objectId?: string;
  127. exportId?: string;
  128. }
  129. interface FeedAggregateDataByTabType {
  130. tabType?: number;
  131. tabTypeName?: string;
  132. data?: {
  133. browse?: string[];
  134. like?: string[];
  135. comment?: string[];
  136. forward?: string[];
  137. fav?: string[];
  138. follow?: string[];
  139. };
  140. }
  141. export class WeixinVideoWorkStatisticsImportService {
  142. private accountRepository = AppDataSource.getRepository(PlatformAccount);
  143. private workRepository = AppDataSource.getRepository(Work);
  144. private workDayStatisticsService = new WorkDayStatisticsService();
  145. static async runDailyImport(): Promise<void> {
  146. const svc = new WeixinVideoWorkStatisticsImportService();
  147. await svc.runDailyImportForAllWeixinVideoAccounts();
  148. }
  149. async runDailyImportForAllWeixinVideoAccounts(): Promise<void> {
  150. const accounts = await this.accountRepository.find({
  151. where: { platform: 'weixin_video' as any },
  152. });
  153. logger.info(`[WX WorkStats] Start import for ${accounts.length} weixin_video accounts`);
  154. for (const account of accounts) {
  155. try {
  156. await this.importAccountWorksStatistics(account);
  157. } catch (e) {
  158. logger.error(
  159. `[WX WorkStats] Account failed. accountId=${account.id} name=${account.accountName || ''}`,
  160. e
  161. );
  162. }
  163. }
  164. logger.info('[WX WorkStats] All accounts done');
  165. }
  166. private async importAccountWorksStatistics(account: PlatformAccount): Promise<void> {
  167. const cookieHeader = getCookieHeaderString(account.cookieData);
  168. if (!cookieHeader) {
  169. logger.warn(`[WX WorkStats] accountId=${account.id} cookieData 为空或无法解析,跳过`);
  170. return;
  171. }
  172. const works = await this.workRepository.find({
  173. where: { accountId: account.id, platform: 'weixin_video' as any },
  174. });
  175. if (!works.length) {
  176. logger.info(`[WX WorkStats] accountId=${account.id} 没有作品,跳过`);
  177. return;
  178. }
  179. const { startTime, endTime, startDate, endDate } = getLast30DaysRange();
  180. const logFinderId = getLogFinderId(account.accountId);
  181. const xWechatUin = getXWechatUinFromCookie(cookieHeader);
  182. // _aid:post_list 时生成一次,本批次请求数据接口(feed_aggreagate)时复用;_rid 每次请求随机
  183. const sessionAid =
  184. process.env.WX_VIDEO_AID?.trim() || crypto.randomUUID();
  185. logger.info(`[WX WorkStats] accountId=${account.id} post_list 生成 aid=${sessionAid},数据接口复用此 aid,rid 每次随机`);
  186. const headers: Record<string, string> = {
  187. accept: '*/*',
  188. 'accept-language': 'zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6',
  189. 'content-type': 'application/json',
  190. cookie: cookieHeader,
  191. origin: 'https://channels.weixin.qq.com',
  192. referer: 'https://channels.weixin.qq.com/micro/statistic/post',
  193. 'user-agent':
  194. 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/144.0.0.0 Safari/537.36 Edg/144.0.0.0',
  195. };
  196. if (xWechatUin) headers['x-wechat-uin'] = xWechatUin;
  197. const postListBody = {
  198. pageSize: 100,
  199. currentPage: 1,
  200. sort: 0,
  201. order: 0,
  202. startTime,
  203. endTime,
  204. timestamp: String(Date.now()),
  205. _log_finder_uin: '',
  206. _log_finder_id: logFinderId,
  207. rawKeyBuff: null,
  208. pluginSessionId: null,
  209. scene: 7,
  210. reqScene: 7,
  211. };
  212. const postListUrl = buildPostListUrl(sessionAid, generateRandomRid());
  213. let res: Response;
  214. try {
  215. res = await fetch(postListUrl, {
  216. method: 'POST',
  217. headers,
  218. body: JSON.stringify(postListBody),
  219. signal: AbortSignal.timeout(30_000),
  220. });
  221. } catch (e) {
  222. logger.error(`[WX WorkStats] post_list request failed. accountId=${account.id}`, e);
  223. throw e;
  224. }
  225. if (!res.ok) {
  226. logger.warn(`[WX WorkStats] post_list HTTP ${res.status}. accountId=${account.id}`);
  227. return;
  228. }
  229. const postListJson = (await res.json().catch(() => null)) as {
  230. errCode?: number;
  231. errMsg?: string;
  232. data?: { list?: PostListItem[]; totalCount?: number };
  233. } | null;
  234. if (!postListJson || postListJson.errCode !== 0) {
  235. logger.warn(
  236. `[WX WorkStats] post_list errCode=${postListJson?.errCode} errMsg=${postListJson?.errMsg}. accountId=${account.id}`
  237. );
  238. return;
  239. }
  240. const list = postListJson.data?.list ?? [];
  241. const totalCount = postListJson.data?.totalCount ?? list.length;
  242. const exportIdToObjectId = new Map<string, string>();
  243. for (const item of list) {
  244. const exportId = item.exportId ?? '';
  245. const objectId = item.objectId ?? '';
  246. if (exportId && objectId) exportIdToObjectId.set(exportId, objectId);
  247. }
  248. // 日志:对比 API 与 DB 的 exportId
  249. logger.info(
  250. `[WX WorkStats] accountId=${account.id} post_list 返回 totalCount=${totalCount} list.length=${list.length}`
  251. );
  252. const apiExportIds: string[] = [];
  253. for (let i = 0; i < list.length; i++) {
  254. const item = list[i];
  255. const eid = (item.exportId ?? '').trim();
  256. const oid = (item.objectId ?? '').trim();
  257. apiExportIds.push(eid);
  258. logger.info(`[WX WorkStats] post_list[${i}] exportId=${eid} objectId=${oid}`);
  259. }
  260. for (const work of works) {
  261. const dbExportId = (work.platformVideoId ?? '').trim();
  262. if (!dbExportId) continue;
  263. const matched = exportIdToObjectId.has(dbExportId);
  264. logger.info(
  265. `[WX WorkStats] DB workId=${work.id} platform_video_id(exportId)=${dbExportId} 匹配post_list=${matched}`
  266. );
  267. if (!matched && apiExportIds.length > 0) {
  268. const sameLength = apiExportIds.filter((e) => e.length === dbExportId.length).length;
  269. const containsDb = apiExportIds.some((e) => e === dbExportId || e.includes(dbExportId) || dbExportId.includes(e));
  270. logger.info(
  271. `[WX WorkStats] 对比: DB长度=${dbExportId.length} API条数=${apiExportIds.length} 同长API条数=${sameLength} 是否包含关系=${containsDb}`
  272. );
  273. }
  274. }
  275. let totalInserted = 0;
  276. let totalUpdated = 0;
  277. const feedHeaders: Record<string, string> = {
  278. ...headers,
  279. referer: 'https://channels.weixin.qq.com/micro/statistic/postDetail?isImageMode=0',
  280. 'finger-print-device-id':
  281. process.env.WX_VIDEO_FINGERPRINT_DEVICE_ID?.trim() ||
  282. '4605bc28ad3962eb9ee791897b199217',
  283. };
  284. for (const work of works) {
  285. const exportId = (work.platformVideoId ?? '').trim();
  286. if (!exportId) continue;
  287. const objectId = exportIdToObjectId.get(exportId);
  288. if (!objectId) {
  289. logger.debug(`[WX WorkStats] workId=${work.id} exportId=${exportId} 未在 post_list 中匹配到 objectId,跳过`);
  290. continue;
  291. }
  292. const feedBody = {
  293. startTs: String(startTime),
  294. endTs: String(endTime),
  295. interval: 3,
  296. feedId: objectId,
  297. timestamp: String(Date.now()),
  298. _log_finder_uin: '',
  299. _log_finder_id: logFinderId,
  300. rawKeyBuff: null,
  301. pluginSessionId: null,
  302. scene: 7,
  303. reqScene: 7,
  304. };
  305. const feedUrl = buildFeedAggregateUrl(sessionAid, generateRandomRid());
  306. let feedRes: Response;
  307. try {
  308. feedRes = await fetch(feedUrl, {
  309. method: 'POST',
  310. headers: feedHeaders,
  311. body: JSON.stringify(feedBody),
  312. signal: AbortSignal.timeout(30_000),
  313. });
  314. } catch (e) {
  315. logger.error(`[WX WorkStats] feed_aggreagate request failed. workId=${work.id} feedId=${objectId}`, e);
  316. continue;
  317. }
  318. if (!feedRes.ok) {
  319. logger.warn(`[WX WorkStats] feed_aggreagate HTTP ${feedRes.status}. workId=${work.id}`);
  320. continue;
  321. }
  322. const feedJson = (await feedRes.json().catch(() => null)) as {
  323. errCode?: number;
  324. data?: {
  325. dataByFanstype?: { dataByTabtype?: FeedAggregateDataByTabType[] }[];
  326. feedData?: { dataByTabtype?: FeedAggregateDataByTabType[] }[];
  327. };
  328. } | null;
  329. const isTestWork = work.id === 866 || work.id === 867 || work.id === 902 || work.id === 903;
  330. if (isTestWork) {
  331. logger.info(`[WX WorkStats] feed_aggreagate 原始响应 workId=${work.id} errCode=${feedJson?.errCode} errMsg=${(feedJson as any)?.errMsg} 完整body=${JSON.stringify(feedJson ?? null)}`);
  332. }
  333. if (!feedJson || feedJson.errCode !== 0) {
  334. if (isTestWork) {
  335. logger.warn(`[WX WorkStats] workId=${work.id} feed_aggreagate 非成功 errCode=${feedJson?.errCode} 跳过`);
  336. }
  337. continue;
  338. }
  339. const dataByFanstype = feedJson.data?.dataByFanstype ?? [];
  340. const firstFans = dataByFanstype[0];
  341. const dataByTabtype = firstFans?.dataByTabtype ?? feedJson.data?.feedData?.[0]?.dataByTabtype ?? [];
  342. const tabAll = dataByTabtype.find((t) => t.tabTypeName === '全部' || t.tabType === 999);
  343. if (isTestWork) {
  344. logger.info(
  345. `[WX WorkStats] workId=${work.id} dataByTabtype.length=${dataByTabtype.length} tabAll=${!!tabAll} tabAll.tabTypeName=${tabAll?.tabTypeName}`
  346. );
  347. }
  348. if (!tabAll?.data) continue;
  349. const data = tabAll.data;
  350. const browse = data.browse ?? [];
  351. const like = data.like ?? [];
  352. const comment = data.comment ?? [];
  353. if (isTestWork) {
  354. logger.info(
  355. `[WX WorkStats] workId=${work.id} 「全部」data: browse.length=${browse.length} like.length=${like.length} comment.length=${comment.length}`
  356. );
  357. logger.info(`[WX WorkStats] workId=${work.id} browse=${JSON.stringify(browse)}`);
  358. logger.info(`[WX WorkStats] workId=${work.id} like=${JSON.stringify(like)}`);
  359. logger.info(`[WX WorkStats] workId=${work.id} comment=${JSON.stringify(comment)}`);
  360. }
  361. const len = Math.max(browse.length, like.length, comment.length);
  362. if (len === 0) continue;
  363. const patches: Array<{
  364. workId: number;
  365. recordDate: Date;
  366. playCount?: number;
  367. likeCount?: number;
  368. commentCount?: number;
  369. }> = [];
  370. for (let i = 0; i < len; i++) {
  371. const recordDate = new Date(startDate);
  372. recordDate.setDate(recordDate.getDate() + i);
  373. recordDate.setHours(0, 0, 0, 0);
  374. if (recordDate > endDate) break;
  375. patches.push({
  376. workId: work.id,
  377. recordDate,
  378. playCount: toInt(browse[i], 0),
  379. likeCount: toInt(like[i], 0),
  380. commentCount: toInt(comment[i], 0),
  381. });
  382. }
  383. if (isTestWork) {
  384. logger.info(`[WX WorkStats] workId=${work.id} 生成 patches.length=${patches.length} 前3条=${JSON.stringify(patches.slice(0, 3))}`);
  385. }
  386. if (patches.length) {
  387. const result = await this.workDayStatisticsService.saveStatisticsForDateBatch(patches);
  388. if (isTestWork) {
  389. logger.info(`[WX WorkStats] workId=${work.id} saveStatisticsForDateBatch inserted=${result.inserted} updated=${result.updated}`);
  390. }
  391. totalInserted += result.inserted;
  392. totalUpdated += result.updated;
  393. }
  394. }
  395. logger.info(
  396. `[WX WorkStats] accountId=${account.id} completed. inserted=${totalInserted} updated=${totalUpdated}`
  397. );
  398. }
  399. }