BaijiahaoWorkDailyStatisticsImportService.ts 25 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749
  1. import fs from 'node:fs/promises';
  2. import path from 'node:path';
  3. import { chromium, type Browser, type BrowserContext, type Page } from 'playwright';
  4. import { AppDataSource, PlatformAccount, Work } from '../models/index.js';
  5. import { logger } from '../utils/logger.js';
  6. import { WorkDayStatisticsService } from './WorkDayStatisticsService.js';
  7. import type { ProxyConfig } from '@media-manager/shared';
  8. import { AccountService } from './AccountService.js';
  9. import { getPythonServiceBaseUrl } from './PythonServiceConfigService.js';
  10. type PlaywrightCookie = {
  11. name: string;
  12. value: string;
  13. domain?: string;
  14. path?: string;
  15. url?: string;
  16. expires?: number;
  17. httpOnly?: boolean;
  18. secure?: boolean;
  19. sameSite?: 'Lax' | 'None' | 'Strict';
  20. };
  21. type BjhListType = 'small_video_v2' | 'video' | 'news';
  22. type ArticleListStatisticItem = {
  23. article_id?: string;
  24. nid?: string;
  25. id?: string;
  26. title?: string;
  27. type?: string;
  28. view_count?: number;
  29. comment_count?: number;
  30. likes_count?: number;
  31. collect_count?: number;
  32. share_count?: number;
  33. rec_count?: number;
  34. };
  35. type ArticleListStatisticResponse = {
  36. errno?: number;
  37. errmsg?: string;
  38. data?: {
  39. count?: string | number;
  40. list?: ArticleListStatisticItem[];
  41. };
  42. };
  43. type TrendItem = {
  44. event_day?: string; // YYYYMMDD
  45. view_count?: string | number;
  46. disp_pv?: string | number;
  47. likes_count?: string | number;
  48. comment_count?: string | number;
  49. collect_count?: string | number;
  50. share_count?: string | number;
  51. cover_ctr?: string | number;
  52. completion_ratio?: string | number;
  53. avg_duration?: string | number;
  54. view_duration?: string | number;
  55. fans_add_cnt?: string | number;
  56. };
  57. type GetTrendDataResponse = {
  58. errno?: number;
  59. errmsg?: string;
  60. data?: {
  61. basic_list?: TrendItem[];
  62. };
  63. };
  64. function ensureDir(p: string) {
  65. return fs.mkdir(p, { recursive: true });
  66. }
  67. function parseCookiesFromAccount(cookieData: string | null): PlaywrightCookie[] {
  68. if (!cookieData) return [];
  69. const raw = cookieData.trim();
  70. if (!raw) return [];
  71. // 1) JSON array
  72. if (raw.startsWith('[') || raw.startsWith('{')) {
  73. try {
  74. const parsed = JSON.parse(raw);
  75. const arr = Array.isArray(parsed) ? parsed : (parsed?.cookies ? parsed.cookies : []);
  76. if (!Array.isArray(arr)) return [];
  77. return arr
  78. .map((c: any) => {
  79. const name = String(c?.name ?? '').trim();
  80. const value = String(c?.value ?? '').trim();
  81. if (!name) return null;
  82. const domain = c?.domain ? String(c.domain) : undefined;
  83. const pathVal = c?.path ? String(c.path) : '/';
  84. const url = !domain ? 'https://baijiahao.baidu.com' : undefined;
  85. const sameSiteRaw = c?.sameSite;
  86. const sameSite =
  87. sameSiteRaw === 'Lax' || sameSiteRaw === 'None' || sameSiteRaw === 'Strict'
  88. ? sameSiteRaw
  89. : undefined;
  90. return {
  91. name,
  92. value,
  93. domain,
  94. path: pathVal,
  95. url,
  96. expires: typeof c?.expires === 'number' ? c.expires : undefined,
  97. httpOnly: typeof c?.httpOnly === 'boolean' ? c.httpOnly : undefined,
  98. secure: typeof c?.secure === 'boolean' ? c.secure : undefined,
  99. sameSite,
  100. } satisfies PlaywrightCookie;
  101. })
  102. .filter(Boolean) as PlaywrightCookie[];
  103. } catch {
  104. // fallthrough
  105. }
  106. }
  107. // 2) "a=b; c=d"
  108. const pairs = raw.split(';').map((p) => p.trim()).filter(Boolean);
  109. const cookies: PlaywrightCookie[] = [];
  110. for (const p of pairs) {
  111. const idx = p.indexOf('=');
  112. if (idx <= 0) continue;
  113. const name = p.slice(0, idx).trim();
  114. const value = p.slice(idx + 1).trim();
  115. if (!name) continue;
  116. cookies.push({ name, value, url: 'https://baijiahao.baidu.com' });
  117. }
  118. return cookies;
  119. }
  120. async function createBrowserForAccount(proxy: ProxyConfig | null): Promise<{ browser: Browser; shouldClose: boolean }> {
  121. const headless = true;
  122. if (proxy?.enabled) {
  123. const server = `${proxy.type}://${proxy.host}:${proxy.port}`;
  124. const browser = await chromium.launch({
  125. headless,
  126. proxy: {
  127. server,
  128. username: proxy.username,
  129. password: proxy.password,
  130. },
  131. args: ['--no-sandbox', '--disable-setuid-sandbox', '--disable-dev-shm-usage', '--disable-gpu', '--window-size=1920,1080'],
  132. });
  133. return { browser, shouldClose: true };
  134. }
  135. const browser = await chromium.launch({
  136. headless,
  137. args: ['--no-sandbox', '--disable-setuid-sandbox', '--disable-dev-shm-usage', '--disable-gpu', '--window-size=1920,1080'],
  138. });
  139. return { browser, shouldClose: true };
  140. }
  141. function isJwtLike(v: unknown): v is string {
  142. if (!v || typeof v !== 'string') return false;
  143. const s = v.trim();
  144. if (s.length < 60) return false;
  145. const parts = s.split('.');
  146. if (parts.length !== 3) return false;
  147. return parts.every((p) => /^[A-Za-z0-9_-]+$/.test(p) && p.length > 10);
  148. }
  149. async function extractTokenFromPage(page: Page): Promise<string> {
  150. const token = await page
  151. .evaluate(() => {
  152. const isJwtLikeInner = (v: any) => {
  153. if (!v || typeof v !== 'string') return false;
  154. const s = v.trim();
  155. if (s.length < 60) return false;
  156. const parts = s.split('.');
  157. if (parts.length !== 3) return false;
  158. return parts.every((p) => /^[A-Za-z0-9_-]+$/.test(p) && p.length > 10);
  159. };
  160. const pickFromStorage = (storage: Storage) => {
  161. try {
  162. const keys = Object.keys(storage || {});
  163. for (const k of keys) {
  164. const v = storage.getItem(k);
  165. if (isJwtLikeInner(v)) return v;
  166. }
  167. } catch {
  168. // ignore
  169. }
  170. return '';
  171. };
  172. let t = pickFromStorage(window.localStorage);
  173. if (t) return t;
  174. t = pickFromStorage(window.sessionStorage);
  175. if (t) return t;
  176. const meta = document.querySelector('meta[name="token"], meta[name="bjh-token"]');
  177. const metaToken = meta && meta.getAttribute('content');
  178. if (isJwtLikeInner(metaToken)) return metaToken;
  179. const candidates = [
  180. ((window as any).__INITIAL_STATE__ && (window as any).__INITIAL_STATE__.token) || '',
  181. ((window as any).__PRELOADED_STATE__ && (window as any).__PRELOADED_STATE__.token) || '',
  182. ((window as any).__NUXT__ && (window as any).__NUXT__.state && (window as any).__NUXT__.state.token) || '',
  183. ];
  184. for (const c of candidates) {
  185. if (isJwtLikeInner(c)) return c;
  186. }
  187. return '';
  188. })
  189. .catch(() => '');
  190. if (token && isJwtLike(token)) return token;
  191. // HTML 兜底
  192. const html = await page.content().catch(() => '');
  193. const m = html.match(/([A-Za-z0-9_-]{10,}\.[A-Za-z0-9_-]{10,}\.[A-Za-z0-9_-]{10,})/);
  194. if (m?.[1] && isJwtLike(m[1])) return m[1];
  195. return '';
  196. }
  197. function toYmd(date: Date): string {
  198. const yyyy = date.getFullYear();
  199. const mm = String(date.getMonth() + 1).padStart(2, '0');
  200. const dd = String(date.getDate()).padStart(2, '0');
  201. return `${yyyy}${mm}${dd}`;
  202. }
  203. function parseYyyyMmDdCompactToDate(day: string): Date | null {
  204. const s = String(day || '').trim();
  205. const m = s.match(/^(\d{4})(\d{2})(\d{2})$/);
  206. if (!m) return null;
  207. const d = new Date(Number(m[1]), Number(m[2]) - 1, Number(m[3]));
  208. d.setHours(0, 0, 0, 0);
  209. return d;
  210. }
  211. function toInt(v: unknown): number {
  212. if (v === null || v === undefined) return 0;
  213. if (typeof v === 'number' && Number.isFinite(v)) return Math.floor(v);
  214. const s = String(v).trim();
  215. if (!s) return 0;
  216. const n = Number(s.replace(/,/g, ''));
  217. return Number.isFinite(n) ? Math.floor(n) : 0;
  218. }
  219. function _toStr(v: unknown): string {
  220. if (v === null || v === undefined) return '0';
  221. const s = String(v).trim();
  222. return s || '0';
  223. }
  224. /** 比率:不为 0 时加上 %,为 0 或空返回 '0' */
  225. function formatRateWithPercent(v: unknown): string {
  226. if (v === null || v === undefined) return '0';
  227. const s = String(v).trim();
  228. if (!s) return '0';
  229. const n = Number(s.replace(/,/g, ''));
  230. if (!Number.isFinite(n) || n === 0) return '0';
  231. if (s.includes('%')) return s;
  232. if (n > 0 && n <= 1) return `${(n * 100).toFixed(2)}%`;
  233. return `${Number(n.toFixed(2))}%`;
  234. }
  235. /** 观看时长:保留两位小数 */
  236. function formatDurationTwoDecimals(v: unknown): string {
  237. if (v === null || v === undefined) return '0';
  238. const n = Number(String(v).trim().replace(/,/g, ''));
  239. if (!Number.isFinite(n)) return '0';
  240. return n.toFixed(2);
  241. }
  242. export class BaijiahaoWorkDailyStatisticsImportService {
  243. private accountRepository = AppDataSource.getRepository(PlatformAccount);
  244. private workRepository = AppDataSource.getRepository(Work);
  245. private workDayStatisticsService = new WorkDayStatisticsService();
  246. private accountService = new AccountService();
  247. private stateDir = path.resolve(process.cwd(), 'tmp', 'baijiahao-storage-state');
  248. static async runDailyImport(): Promise<void> {
  249. const svc = new BaijiahaoWorkDailyStatisticsImportService();
  250. await svc.runDailyImportForAllBaijiahaoAccounts();
  251. }
  252. static async runDailyImportForAccount(accountId: number): Promise<void> {
  253. const svc = new BaijiahaoWorkDailyStatisticsImportService();
  254. const account = await svc.accountRepository.findOne({
  255. where: { id: accountId, platform: 'baijiahao' as any },
  256. });
  257. if (!account) throw new Error(`未找到百家号账号 id=${accountId}`);
  258. await svc.importAccountWorkDaily(account);
  259. }
  260. async runDailyImportForAllBaijiahaoAccounts(): Promise<void> {
  261. await ensureDir(this.stateDir);
  262. const accounts = await this.accountRepository.find({ where: { platform: 'baijiahao' as any } });
  263. logger.info(`[BJ WorkDaily] Start. total_accounts=${accounts.length}`);
  264. for (const account of accounts) {
  265. try {
  266. await this.importAccountWorkDaily(account);
  267. } catch (e) {
  268. logger.error(
  269. `[BJ WorkDaily] Account failed. accountId=${account.id} name=${account.accountName || ''}`,
  270. e
  271. );
  272. }
  273. }
  274. logger.info('[BJ WorkDaily] Done.');
  275. }
  276. private getStatePath(accountId: number) {
  277. return path.join(this.stateDir, `${accountId}.json`);
  278. }
  279. private async _createContext(
  280. account: PlatformAccount,
  281. cookies: PlaywrightCookie[]
  282. ): Promise<{ context: BrowserContext; browser: Browser; shouldClose: boolean; token: string }> {
  283. const { browser, shouldClose } = await createBrowserForAccount(account.proxyConfig);
  284. const statePath = this.getStatePath(account.id);
  285. let hasState = false;
  286. try {
  287. await fs.access(statePath);
  288. hasState = true;
  289. } catch {
  290. hasState = false;
  291. }
  292. const context = await browser.newContext({
  293. viewport: { width: 1920, height: 1080 },
  294. locale: 'zh-CN',
  295. timezoneId: 'Asia/Shanghai',
  296. userAgent:
  297. 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/144.0.0.0 Safari/537.36 Edg/144.0.0.0',
  298. ...(hasState ? { storageState: statePath } : {}),
  299. });
  300. context.setDefaultTimeout(60_000);
  301. if (!hasState) {
  302. await context.addCookies(cookies as any);
  303. }
  304. const page = await context.newPage();
  305. await page.goto('https://baijiahao.baidu.com/builder/rc/analysiscontent/single', {
  306. waitUntil: 'domcontentloaded',
  307. });
  308. await page.waitForTimeout(1500);
  309. const token = await extractTokenFromPage(page);
  310. if (token) {
  311. try {
  312. await ensureDir(this.stateDir);
  313. await context.storageState({ path: statePath });
  314. } catch {
  315. // ignore
  316. }
  317. }
  318. await page.close().catch(() => undefined);
  319. return { context, browser, shouldClose, token };
  320. }
  321. private buildCommonHeaders(token: string): Record<string, string> {
  322. const headers: Record<string, string> = {
  323. accept: 'application/json, text/plain, */*',
  324. 'accept-language': 'zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6',
  325. referer: 'https://baijiahao.baidu.com/builder/rc/analysiscontent/single',
  326. };
  327. if (token) headers.token = token;
  328. return headers;
  329. }
  330. private async _fetchArticleListStatisticPage(
  331. context: BrowserContext,
  332. token: string,
  333. params: {
  334. startDay: string; // YYYYMMDD
  335. endDay: string; // YYYYMMDD
  336. type: BjhListType;
  337. num: number;
  338. count: number;
  339. }
  340. ): Promise<ArticleListStatisticResponse> {
  341. const { startDay, endDay, type, num, count } = params;
  342. const url = `https://baijiahao.baidu.com/author/eco/statistics/articleListStatistic?start_day=${startDay}&end_day=${endDay}&type=${type}&num=${num}&count=${count}`;
  343. const res = await (context as any).request.get(url, {
  344. headers: this.buildCommonHeaders(token),
  345. });
  346. const json = (await res.json().catch(() => null)) as ArticleListStatisticResponse | null;
  347. if (!json) throw new Error(`articleListStatistic json parse failed (http=${res.status()})`);
  348. return json;
  349. }
  350. private async _fetchTrendData(
  351. context: BrowserContext,
  352. token: string,
  353. nid: string
  354. ): Promise<GetTrendDataResponse> {
  355. const url = `https://baijiahao.baidu.com/author/eco/statistic/gettrenddata?nid=${encodeURIComponent(
  356. nid
  357. )}&trend_type=all&data_type=addition`;
  358. const res = await (context as any).request.get(url, {
  359. headers: this.buildCommonHeaders(token),
  360. });
  361. const json = (await res.json().catch(() => null)) as GetTrendDataResponse | null;
  362. if (!json) throw new Error(`gettrenddata json parse failed (http=${res.status()})`);
  363. return json;
  364. }
  365. private isNotLoggedInErrno(errno: unknown): boolean {
  366. const n = typeof errno === 'number' ? errno : Number(errno);
  367. // 110: 未登录;20040001: 当前用户未登录(你示例里的 errno)
  368. return n === 110 || n === 20040001;
  369. }
  370. private isNotLoggedInError(e: unknown): boolean {
  371. const err = e as any;
  372. if (!err) return false;
  373. if (err.code === 'BJH_NOT_LOGGED_IN') return true;
  374. const msg = String(err.message || '').toLowerCase();
  375. return msg.includes('未登录') || msg.includes('not logged in');
  376. }
  377. /**
  378. * 通过 Python 服务调用百家号 articleListStatistic 接口
  379. * 复用 Python 端对 Cookie 的处理和登录逻辑
  380. */
  381. private async fetchArticleListStatisticViaPython(
  382. account: PlatformAccount,
  383. params: {
  384. startDay: string; // YYYYMMDD
  385. endDay: string; // YYYYMMDD
  386. type: BjhListType;
  387. num: number;
  388. count: number;
  389. }
  390. ): Promise<ArticleListStatisticResponse> {
  391. const base = (await getPythonServiceBaseUrl()).replace(/\/$/, '');
  392. const url = `${base}/baijiahao/article_stats`;
  393. const cookie = String(account.cookieData || '').trim();
  394. if (!cookie) {
  395. throw new Error('百家号账号 cookie 为空,无法调用 Python article_stats');
  396. }
  397. const controller = new AbortController();
  398. const timeoutId = setTimeout(() => controller.abort(), 30_000);
  399. try {
  400. const res = await fetch(url, {
  401. method: 'POST',
  402. signal: controller.signal,
  403. headers: {
  404. 'Content-Type': 'application/json',
  405. },
  406. body: JSON.stringify({
  407. cookie,
  408. start_day: params.startDay,
  409. end_day: params.endDay,
  410. type: params.type,
  411. num: params.num,
  412. count: params.count,
  413. }),
  414. });
  415. const text = await res.text();
  416. let data: any = {};
  417. try {
  418. data = text ? JSON.parse(text) : {};
  419. } catch {
  420. throw new Error(`Python article_stats 返回非 JSON 响应: http=${res.status}`);
  421. }
  422. if (!res.ok) {
  423. const msg = String(data?.errmsg || data?.error || '').trim() || `HTTP ${res.status}`;
  424. throw new Error(`Python article_stats 调用失败: ${msg}`);
  425. }
  426. const errno = typeof data?.errno === 'number' ? data.errno : Number(data?.errno ?? 0);
  427. const errmsg = String(data?.errmsg || data?.error || '').trim() || undefined;
  428. const payload: ArticleListStatisticResponse = {
  429. errno,
  430. errmsg,
  431. data: data?.data,
  432. };
  433. return payload;
  434. } finally {
  435. clearTimeout(timeoutId);
  436. }
  437. }
  438. /**
  439. * 通过 Python 服务调用百家号 gettrenddata 接口
  440. * 复用 Python 端对 Cookie 的处理和登录逻辑
  441. */
  442. private async fetchTrendDataViaPython(
  443. account: PlatformAccount,
  444. nid: string
  445. ): Promise<GetTrendDataResponse> {
  446. const base = (await getPythonServiceBaseUrl()).replace(/\/$/, '');
  447. const url = `${base}/baijiahao/trend_data`;
  448. const cookie = String(account.cookieData || '').trim();
  449. if (!cookie) {
  450. throw new Error('百家号账号 cookie 为空,无法调用 Python trend_data');
  451. }
  452. const controller = new AbortController();
  453. const timeoutId = setTimeout(() => controller.abort(), 30_000);
  454. try {
  455. const res = await fetch(url, {
  456. method: 'POST',
  457. signal: controller.signal,
  458. headers: {
  459. 'Content-Type': 'application/json',
  460. },
  461. body: JSON.stringify({
  462. cookie,
  463. nid,
  464. }),
  465. });
  466. const text = await res.text();
  467. let data: any = {};
  468. try {
  469. data = text ? JSON.parse(text) : {};
  470. } catch {
  471. throw new Error(`Python trend_data 返回非 JSON 响应: http=${res.status}`);
  472. }
  473. if (!res.ok) {
  474. const msg = String(data?.errmsg || data?.error || '').trim() || `HTTP ${res.status}`;
  475. throw new Error(`Python trend_data 调用失败: ${msg}`);
  476. }
  477. const errno = typeof data?.errno === 'number' ? data.errno : Number(data?.errno ?? 0);
  478. const errmsg = String(data?.errmsg || data?.error || '').trim() || undefined;
  479. const payload: GetTrendDataResponse = {
  480. errno,
  481. errmsg,
  482. data: data?.data,
  483. };
  484. return payload;
  485. } finally {
  486. clearTimeout(timeoutId);
  487. }
  488. }
  489. private async importAccountWorkDaily(account: PlatformAccount, isRetry = false): Promise<void> {
  490. const cookies = parseCookiesFromAccount(account.cookieData);
  491. if (!cookies.length) {
  492. logger.warn(
  493. `[BJ WorkDaily] accountId=${account.id} cookieData 为空或无法解析,跳过`
  494. );
  495. return;
  496. }
  497. const works = await this.workRepository.find({
  498. where: { accountId: account.id, platform: 'baijiahao' as any },
  499. select: ['id', 'platformVideoId'],
  500. });
  501. if (!works.length) {
  502. logger.info(
  503. `[BJ WorkDaily] accountId=${account.id} 没有 baijiahao 作品,跳过`
  504. );
  505. return;
  506. }
  507. const idMap = new Map<string, number>();
  508. for (const w of works) {
  509. const k = String(w.platformVideoId || '').trim();
  510. if (k) idMap.set(k, w.id);
  511. }
  512. try {
  513. // 默认取近 30 天(中国时区):昨天作为 end_day,往前推 29 天作为 start_day
  514. const now = new Date();
  515. const chinaNow = new Date(now.getTime() + 8 * 60 * 60 * 1000);
  516. const chinaYesterday = new Date(chinaNow.getTime() - 24 * 60 * 60 * 1000);
  517. const endDay = toYmd(chinaYesterday);
  518. const startDayDate = new Date(chinaYesterday);
  519. startDayDate.setDate(startDayDate.getDate() - 29);
  520. const startDay = toYmd(startDayDate);
  521. const types: BjhListType[] = ['small_video_v2', 'video', 'news'];
  522. const pageSize = 10;
  523. let worksUpdated = 0;
  524. let wdsInserted = 0;
  525. let wdsUpdated = 0;
  526. for (const t of types) {
  527. let num = 1;
  528. let total = 0;
  529. while (true) {
  530. const body = await this.fetchArticleListStatisticViaPython(account, {
  531. startDay,
  532. endDay,
  533. type: t,
  534. num,
  535. count: pageSize,
  536. });
  537. if (this.isNotLoggedInErrno(body.errno)) {
  538. const err = new Error(
  539. `articleListStatistic errno=${body.errno} 未登录/会话失效`
  540. );
  541. (err as any).code = 'BJH_NOT_LOGGED_IN';
  542. throw err;
  543. }
  544. if (body.errno !== 0) {
  545. throw new Error(
  546. `articleListStatistic errno=${body.errno} errmsg=${body.errmsg || ''}`
  547. );
  548. }
  549. const list = body.data?.list || [];
  550. const countRaw = body.data?.count;
  551. total = typeof countRaw === 'string' ? toInt(countRaw) : toInt(countRaw);
  552. if (!list.length) break;
  553. // 1) 先把列表汇总写入 works.yesterday_*
  554. for (const it of list) {
  555. const articleId = String(it.article_id || '').trim();
  556. if (!articleId) continue;
  557. const workId = idMap.get(articleId);
  558. if (!workId) continue;
  559. const patch: Partial<Work> = {
  560. yesterdayPlayCount: toInt(it.view_count),
  561. yesterdayCommentCount: toInt(it.comment_count),
  562. yesterdayLikeCount: toInt(it.likes_count),
  563. yesterdayCollectCount: toInt(it.collect_count),
  564. yesterdayShareCount: toInt(it.share_count),
  565. // 百家号列表 rec_count → 推荐量
  566. yesterdayRecommendCount: toInt(it.rec_count),
  567. };
  568. const r = await this.workRepository.update(workId, patch as any);
  569. if (r.affected && r.affected > 0) worksUpdated += r.affected;
  570. }
  571. // 2) 再逐条拉趋势,把 basic_list 写入 work_day_statistics
  572. for (const it of list) {
  573. const articleId = String(it.article_id || '').trim();
  574. if (!articleId) continue;
  575. const workId = idMap.get(articleId);
  576. if (!workId) continue;
  577. const trend = await this.fetchTrendDataViaPython(account, articleId);
  578. if (this.isNotLoggedInErrno(trend.errno)) {
  579. const err = new Error(
  580. `gettrenddata errno=${trend.errno} 未登录/会话失效`
  581. );
  582. (err as any).code = 'BJH_NOT_LOGGED_IN';
  583. throw err;
  584. }
  585. if (trend.errno !== 0) {
  586. logger.warn(
  587. `[BJ WorkDaily] gettrenddata errno=${trend.errno} nid=${articleId} errmsg=${trend.errmsg || ''}`
  588. );
  589. continue;
  590. }
  591. const basic = trend.data?.basic_list || [];
  592. for (const day of basic) {
  593. const d = parseYyyyMmDdCompactToDate(String(day.event_day || ''));
  594. if (!d) continue;
  595. const save = await this.workDayStatisticsService.saveStatisticsForDate(
  596. workId,
  597. d,
  598. {
  599. playCount: toInt(day.view_count),
  600. likeCount: toInt(day.likes_count),
  601. commentCount: toInt(day.comment_count),
  602. collectCount: toInt(day.collect_count),
  603. shareCount: toInt(day.share_count),
  604. // basic_list 目前没有推荐量字段;如果后续有再映射到 recommendCount
  605. fansIncrease: toInt(day.fans_add_cnt),
  606. coverClickRate: formatRateWithPercent(day.cover_ctr),
  607. completionRate: formatRateWithPercent(day.completion_ratio),
  608. avgWatchDuration: formatDurationTwoDecimals(day.avg_duration),
  609. totalWatchDuration: formatDurationTwoDecimals(day.view_duration),
  610. }
  611. );
  612. wdsInserted += save.inserted;
  613. wdsUpdated += save.updated;
  614. }
  615. }
  616. const fetched = num * pageSize;
  617. if (total > 0 && fetched >= total) break;
  618. num += 1;
  619. if (num > 200) break;
  620. }
  621. }
  622. logger.info(
  623. `[BJ WorkDaily] accountId=${account.id} done. worksUpdated=${worksUpdated} wdsInserted=${wdsInserted} wdsUpdated=${wdsUpdated} range=${startDay}-${endDay}`
  624. );
  625. } catch (e) {
  626. if (!isRetry && this.isNotLoggedInError(e)) {
  627. logger.info(
  628. `[BJ WorkDaily] Login expired detected for account ${account.id}, attempting to refresh account...`
  629. );
  630. try {
  631. const refreshResult = await this.accountService.refreshAccount(
  632. account.userId,
  633. account.id
  634. );
  635. if (refreshResult.needReLogin) {
  636. logger.warn(
  637. `[BJ WorkDaily] Account ${account.id} refresh finished but still need re-login, mark as expired.`
  638. );
  639. await this.accountRepository.update(account.id, {
  640. status: 'expired' as any,
  641. });
  642. return;
  643. }
  644. const refreshed = await this.accountRepository.findOne({
  645. where: { id: account.id },
  646. });
  647. if (!refreshed) {
  648. throw new Error('账号刷新后未找到');
  649. }
  650. logger.info(
  651. `[BJ WorkDaily] Account ${account.id} refresh success, retry work daily import once...`
  652. );
  653. await this.importAccountWorkDaily(refreshed, true);
  654. return;
  655. } catch (refreshError) {
  656. logger.error(
  657. `[BJ WorkDaily] Account ${account.id} refresh failed:`,
  658. refreshError
  659. );
  660. await this.accountRepository.update(account.id, {
  661. status: 'expired' as any,
  662. });
  663. return;
  664. }
  665. }
  666. throw e;
  667. }
  668. }
  669. }