Przeglądaj źródła

抖音用户数据同步更新

Ethanfly 14 godzin temu
rodzic
commit
32774531c7
1 zmienionych plików z 121 dodań i 177 usunięć
  1. 121 177
      server/src/services/DouyinAccountOverviewImportService.ts

+ 121 - 177
server/src/services/DouyinAccountOverviewImportService.ts

@@ -1,7 +1,6 @@
 import fs from 'node:fs/promises';
 import path from 'node:path';
 import { chromium, type Browser } from 'playwright';
-import * as XLSXNS from 'xlsx';
 import { AppDataSource, PlatformAccount } from '../models/index.js';
 import { BrowserManager } from '../automation/browser.js';
 import { logger } from '../utils/logger.js';
@@ -11,10 +10,6 @@ import type { ProxyConfig } from '@media-manager/shared';
 import { WS_EVENTS } from '@media-manager/shared';
 import { wsManager } from '../websocket/index.js';
 
-// xlsx 在 ESM 下可能挂在 default 上;这里做一次兼容兜底
-// eslint-disable-next-line @typescript-eslint/no-explicit-any
-const XLSX: any = (XLSXNS as any).default ?? (XLSXNS as any);
-
 type PlaywrightCookie = {
   name: string;
   value: string;
@@ -67,20 +62,14 @@ function normalizeDateText(input: unknown): Date | null {
   return null;
 }
 
-function parseChineseNumberLike(input: unknown): number | null {
-  if (input === null || input === undefined) return null;
-  const s = String(input).trim();
-  if (!s) return null;
-  // 8,077
-  const plain = s.replace(/,/g, '');
-  // 4.8万
-  const wan = plain.match(/^(\d+(\.\d+)?)\s*万$/);
-  if (wan) return Math.round(Number(wan[1]) * 10000);
-  const yi = plain.match(/^(\d+(\.\d+)?)\s*亿$/);
-  if (yi) return Math.round(Number(yi[1]) * 100000000);
-  const n = Number(plain.replace(/[^\d.-]/g, ''));
-  if (Number.isFinite(n)) return Math.round(n);
-  return null;
+function toRatePercentStringFromValue(val: unknown): string | undefined {
+  const n = typeof val === 'number' ? val : Number(val);
+  if (!Number.isFinite(n)) return undefined;
+  if (n === 0) return '0';
+  const scaled = n * 100;
+  const rounded = Math.round(scaled * 100) / 100;
+  const s = rounded.toFixed(2).replace(/\.00$/, '').replace(/(\.\d)0$/, '$1');
+  return `${s}%`;
 }
 
 function parseCookiesFromAccount(cookieData: string | null): PlaywrightCookie[] {
@@ -162,92 +151,56 @@ async function createBrowserForAccount(proxy: ProxyConfig | null): Promise<{ bro
   return { browser, shouldClose: false };
 }
 
-function parseDouyinExcel(
-  filePath: string
-): Map<string, { recordDate: Date } & Record<string, any>> {
-  const wb = XLSX.readFile(filePath);
-  const result = new Map<string, { recordDate: Date } & Record<string, any>>();
-
-  logger.info(
-    `[DY Import] Excel loaded. file=${path.basename(filePath)} sheets=${wb.SheetNames.join(' | ')}`
-  );
-
-  for (const sheetName of wb.SheetNames) {
-    const sheet = wb.Sheets[sheetName];
-    const rows = XLSX.utils.sheet_to_json<Record<string, any>>(sheet, { defval: '' });
-
-    if (!rows.length) {
-      logger.warn(`[DY Import] Sheet empty. name=${sheetName}`);
-      continue;
-    }
-
-    const keys = Object.keys(rows[0] || {});
-    logger.info(`[DY Import] Sheet parsed. name=${sheetName} rows=${rows.length} keys=${keys.join(',')}`);
-
-    const normalizeKey = (k: string) => k.replace(/^\uFEFF/, '').trim();
-
-    for (const row of rows) {
-      const rawKeys = Object.keys(row || {});
-      if (!rawKeys.length) continue;
-      const keysNormalized = rawKeys.map((k) => ({ raw: k, norm: normalizeKey(k) }));
-
-      // 兼容 Excel 表头带 BOM/空格:优先找包含“日期”的列作为日期列
-      const dateKey =
-        keysNormalized.find((k) => k.norm === '日期')?.raw ??
-        keysNormalized.find((k) => k.norm.includes('日期'))?.raw ??
-        keysNormalized.find((k) => k.norm.toLowerCase() === 'date')?.raw ??
-        keysNormalized[0]!.raw;
-
-      const dateVal = (row as any)[dateKey];
-
-      const d = normalizeDateText(dateVal);
-      if (!d) continue;
-
-      const key = `${d.getFullYear()}-${String(d.getMonth() + 1).padStart(2, '0')}-${String(d.getDate()).padStart(2, '0')}`;
-      if (!result.has(key)) result.set(key, { recordDate: d });
-      const obj = result.get(key)!;
-
-      // 抖音导出的 Excel 通常是两列:日期 + 指标名(如“作品分享/净增粉丝/作品点赞/播放量...”)
-      // 因此优先按“第二列标题”做自动映射,避免漏掉“沈凉音”这种全量导出格式。
-      const metricKeyRaw = keysNormalized.find((k) => k.raw !== dateKey)?.raw;
-      if (!metricKeyRaw) continue;
-      const metricKey = normalizeKey(metricKeyRaw);
-
-      // 显式排除:主页访问 / 取关粉丝
-      if (metricKey.includes('主页访问') || metricKey.includes('取关粉丝')) continue;
+type DashboardMetricTrendPoint = {
+  date_time?: string; // YYYYMMDD
+  value?: number;
+  douyin_value?: number;
+  xigua_value?: number;
+  yumme_value?: number;
+  change_rate?: number;
+};
 
-      const rawVal = (row as any)[metricKeyRaw];
-      if (rawVal === undefined || rawVal === null) continue;
+type DashboardMetricItem = {
+  english_metric_name?: string;
+  metric_name?: string;
+  metric_value?: number;
+  trends?: DashboardMetricTrendPoint[];
+};
 
-      // 1)封面点击率:字符串百分比直接存
-      if (metricKey.includes('封面点击率')) {
-        const s = String(rawVal).trim();
-        if (s) (obj as any).coverClickRate = s;
-        continue;
-      }
+type DashboardResponse = {
+  status_code?: number;
+  status_msg?: string;
+  metrics?: DashboardMetricItem[];
+};
 
-      // 2)其余按数值解析
-      const n = parseChineseNumberLike(rawVal);
-      if (typeof n !== 'number') continue;
-
-      if (metricKey.includes('播放')) (obj as any).playCount = n;
-      else if (metricKey.includes('点赞')) (obj as any).likeCount = n;
-      else if (metricKey.includes('评论')) (obj as any).commentCount = n;
-      else if (metricKey.includes('分享')) (obj as any).shareCount = n;
-      else if (metricKey.includes('净增粉丝') || metricKey.includes('新增粉丝')) (obj as any).fansIncrease = n;
-      // 总粉丝数/总粉丝量:入库 fans_count
-      else if (metricKey.includes('总粉丝')) (obj as any).fansCount = n;
-    }
-  }
+function parseYmdCompactToDate(ymd: unknown): Date | null {
+  const s = String(ymd || '').trim();
+  if (!/^\d{8}$/.test(s)) return null;
+  const yyyy = Number(s.slice(0, 4));
+  const mm = Number(s.slice(4, 6));
+  const dd = Number(s.slice(6, 8));
+  if (!yyyy || !mm || !dd) return null;
+  const d = new Date(yyyy, mm - 1, dd);
+  d.setHours(0, 0, 0, 0);
+  return d;
+}
 
-  return result;
+function pickTrendValue(pt: DashboardMetricTrendPoint): number | undefined {
+  // 优先使用聚合 value;若不存在则兜底 douyin_value
+  const v =
+    typeof pt?.value === 'number'
+      ? pt.value
+      : typeof pt?.douyin_value === 'number'
+        ? pt.douyin_value
+        : undefined;
+  if (!Number.isFinite(v as number)) return undefined;
+  return v as number;
 }
 
 export class DouyinAccountOverviewImportService {
   private accountRepository = AppDataSource.getRepository(PlatformAccount);
   private userDayStatisticsService = new UserDayStatisticsService();
 
-  private downloadDir = path.resolve(process.cwd(), 'tmp', 'douyin-account-overview');
   private stateDir = path.resolve(process.cwd(), 'tmp', 'douyin-storage-state');
 
   private getStatePath(accountId: number) {
@@ -316,8 +269,6 @@ export class DouyinAccountOverviewImportService {
    * 为所有抖音账号导出“账号总览-短视频-数据表现-近30天”并导入 user_day_statistics
    */
   async runDailyImportForAllDouyinAccounts(): Promise<void> {
-    await ensureDir(this.downloadDir);
-
     const accounts = await this.accountRepository.find({
       where: { platform: 'douyin' as any },
     });
@@ -463,94 +414,87 @@ export class DouyinAccountOverviewImportService {
       }
       await page.waitForTimeout(1200);
 
-      // 逐个指标导出(排除:主页访问 / 取关粉丝)
-      // 说明:抖音导出通常是“日期 + 指标”两列,每次只能导出当前选中的指标
-      // 注意:抖音 UI 上“总粉丝”文案可能是「总粉丝量」而不是「总粉丝数」
-      const metricsToExport: Array<{ name: string; candidates: string[] }> = [
-        { name: '播放量', candidates: ['播放量'] },
-        { name: '作品点赞', candidates: ['作品点赞', '点赞'] },
-        { name: '作品评论', candidates: ['作品评论', '评论'] },
-        { name: '作品分享', candidates: ['作品分享', '分享'] },
-        { name: '封面点击率', candidates: ['封面点击率'] },
-        { name: '净增粉丝', candidates: ['净增粉丝', '新增粉丝'] },
-        { name: '总粉丝量', candidates: ['总粉丝量', '总粉丝数', '粉丝总量'] },
-      ];
-
       let totalInserted = 0;
       let totalUpdated = 0;
-      let mergedDays = new Map<string, { recordDate: Date } & Record<string, any>>();
-      const savedExcelPaths: string[] = [];
-
-      const clickMetric = async (metric: { name: string; candidates: string[] }) => {
-        // 先精确匹配,失败后用包含匹配(适配 UI 文案差异)
-        for (const c of metric.candidates) {
-          const locatorExact = page.getByText(c, { exact: true }).first();
-          const exactCount = await locatorExact.count().catch(() => 0);
-          if (exactCount > 0) {
-            await locatorExact.click().catch(() => undefined);
-            await page.waitForTimeout(800);
-            return c;
-          }
-        }
-        for (const c of metric.candidates) {
-          const locatorFuzzy = page.getByText(c, { exact: false }).first();
-          const fuzzyCount = await locatorFuzzy.count().catch(() => 0);
-          if (fuzzyCount > 0) {
-            await locatorFuzzy.click().catch(() => undefined);
-            await page.waitForTimeout(800);
-            return c;
-          }
-        }
-        logger.warn(`[DY Import] metric not found on page. accountId=${account.id} metric=${metric.name}`);
-        return null;
-      };
+      const apiUrl = 'https://creator.douyin.com/janus/douyin/creator/data/overview/dashboard';
 
-      for (const metric of metricsToExport) {
-        logger.info(`[DY Import] accountId=${account.id} exporting metric: ${metric.name}...`);
-        await clickMetric(metric);
-
-        const [download] = await Promise.all([
-          page.waitForEvent('download', { timeout: 60_000 }),
-          page.getByText('导出数据', { exact: true }).first().click(),
-        ]);
-
-        const filename = `${account.id}_${Date.now()}_${download.suggestedFilename()}`;
-        const filePath = path.join(this.downloadDir, filename);
-        await download.saveAs(filePath);
-        // 保留 Excel 不删除,便于核对数据;路径打日志方便查看
-        const absolutePath = path.resolve(filePath);
-        savedExcelPaths.push(absolutePath);
-        logger.info(
-          `[DY Import] Excel saved (${metric.name}): ${absolutePath}`
-        );
+      logger.info(`[DY Import] accountId=${account.id} fetch dashboard (POST recent_days=30)...`);
+
+      // 优先监听页面自身是否会发起该请求;若没有,则在页面上下文里手动 fetch(浏览器自动带 cookie)
+      const responsePromise = page
+        .waitForResponse(
+          (res) => res.request().method() === 'POST' && res.url().includes('/janus/douyin/creator/data/overview/dashboard'),
+          { timeout: 8000 }
+        )
+        .catch(() => null);
 
+      const evalPromise = page.evaluate(async (url) => {
         try {
-          const perDay = parseDouyinExcel(filePath);
-          // 合并不同指标到同一日期 patch(与小红书维度一致)
-          for (const [k, v] of perDay.entries()) {
-            if (!mergedDays.has(k)) mergedDays.set(k, { recordDate: v.recordDate });
-            const base = mergedDays.get(k)!;
-            Object.assign(base, v);
-          }
-          logger.info(
-            `[DY Import] metric exported & parsed. accountId=${account.id} metric=${metric.name} file=${path.basename(filePath)} days=${perDay.size}`
-          );
-        } finally {
-          // 默认导入后删除 Excel,避免磁盘堆积;仅在显式 KEEP_DY_XLSX=true 时保留(用于调试)
-          if (process.env.KEEP_DY_XLSX === 'true') {
-            logger.warn(`[DY Import] KEEP_DY_XLSX=true, keep file: ${filePath}`);
-          } else {
-            await fs.unlink(filePath).catch(() => undefined);
-          }
+          const r = await fetch(url, {
+            method: 'POST',
+            credentials: 'include',
+            headers: {
+              accept: 'application/json, text/plain, */*',
+              'content-type': 'application/json',
+            },
+            body: JSON.stringify({ recent_days: 30 }),
+          });
+          const json = await r.json().catch(() => null);
+          return json;
+        } catch (e: any) {
+          return { status_code: -1, status_msg: String(e?.message || e) };
         }
+      }, apiUrl);
+
+      const [res, evalJson] = await Promise.all([responsePromise, evalPromise]);
+      const body = ((res ? await res.json().catch(() => null) : null) ?? evalJson ?? null) as DashboardResponse | null;
+
+      if (!body || typeof body !== 'object') {
+        throw new Error('overview/dashboard 响应不是 JSON');
+      }
+      if (Number(body.status_code) !== 0) {
+        throw new Error(`overview/dashboard 返回非成功: code=${body.status_code} msg=${body.status_msg || ''}`);
       }
 
-      // 汇总:本账号导出的 7 个 Excel 已解析
-      logger.info(
-        `[DY Import] accountId=${account.id} 共 ${savedExcelPaths.length} 个 Excel 已解析`
-      );
-      if (savedExcelPaths.length !== 7) {
-        logger.warn(`[DY Import] accountId=${account.id} 预期 7 个 Excel,实际 ${savedExcelPaths.length} 个`);
+      const metrics = Array.isArray(body.metrics) ? body.metrics : [];
+      if (!metrics.length) {
+        logger.warn(`[DY Import] dashboard metrics empty. accountId=${account.id}`);
+      }
+
+      const mergedDays = new Map<string, { recordDate: Date } & Record<string, any>>();
+
+      const setDay = (d: Date) => {
+        const key = `${d.getFullYear()}-${String(d.getMonth() + 1).padStart(2, '0')}-${String(d.getDate()).padStart(2, '0')}`;
+        if (!mergedDays.has(key)) mergedDays.set(key, { recordDate: d });
+        return mergedDays.get(key)!;
+      };
+
+      for (const m of metrics) {
+        const en = String(m?.english_metric_name || '').trim();
+        const trends = Array.isArray(m?.trends) ? m.trends : [];
+        if (!en || !trends.length) continue;
+
+        for (const pt of trends) {
+          const d = parseYmdCompactToDate(pt?.date_time);
+          if (!d) continue;
+          const obj = setDay(d);
+          const v = pickTrendValue(pt);
+          if (v === undefined) continue;
+
+          // 显式排除:主页访问 / 取关粉丝(库里没有对应字段)
+          if (en === 'homepage_view_cnt' || en === 'cancel_fans_cnt') continue;
+
+          if (en === 'total_fans_cnt') (obj as any).fansCount = Math.round(v);
+          else if (en === 'play_cnt') (obj as any).playCount = Math.round(v);
+          else if (en === 'digg_cnt') (obj as any).likeCount = Math.round(v);
+          else if (en === 'comment_cnt') (obj as any).commentCount = Math.round(v);
+          else if (en === 'share_count') (obj as any).shareCount = Math.round(v);
+          else if (en === 'net_fans_cnt') (obj as any).fansIncrease = Math.round(v);
+          else if (en === 'cover_click_ratio') {
+            const s = toRatePercentStringFromValue(v);
+            if (s != null) (obj as any).coverClickRate = s;
+          }
+        }
       }
 
       // 合并完成后统一入库(避免同一天多次 update)