|
|
@@ -1,7 +1,6 @@
|
|
|
import fs from 'node:fs/promises';
|
|
|
import path from 'node:path';
|
|
|
import { chromium, type Browser } from 'playwright';
|
|
|
-import * as XLSXNS from 'xlsx';
|
|
|
import { AppDataSource, PlatformAccount } from '../models/index.js';
|
|
|
import { BrowserManager } from '../automation/browser.js';
|
|
|
import { logger } from '../utils/logger.js';
|
|
|
@@ -11,10 +10,6 @@ import type { ProxyConfig } from '@media-manager/shared';
|
|
|
import { WS_EVENTS } from '@media-manager/shared';
|
|
|
import { wsManager } from '../websocket/index.js';
|
|
|
|
|
|
-// xlsx 在 ESM 下可能挂在 default 上;这里做一次兼容兜底
|
|
|
-// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
|
-const XLSX: any = (XLSXNS as any).default ?? (XLSXNS as any);
|
|
|
-
|
|
|
type PlaywrightCookie = {
|
|
|
name: string;
|
|
|
value: string;
|
|
|
@@ -67,20 +62,14 @@ function normalizeDateText(input: unknown): Date | null {
|
|
|
return null;
|
|
|
}
|
|
|
|
|
|
-function parseChineseNumberLike(input: unknown): number | null {
|
|
|
- if (input === null || input === undefined) return null;
|
|
|
- const s = String(input).trim();
|
|
|
- if (!s) return null;
|
|
|
- // 8,077
|
|
|
- const plain = s.replace(/,/g, '');
|
|
|
- // 4.8万
|
|
|
- const wan = plain.match(/^(\d+(\.\d+)?)\s*万$/);
|
|
|
- if (wan) return Math.round(Number(wan[1]) * 10000);
|
|
|
- const yi = plain.match(/^(\d+(\.\d+)?)\s*亿$/);
|
|
|
- if (yi) return Math.round(Number(yi[1]) * 100000000);
|
|
|
- const n = Number(plain.replace(/[^\d.-]/g, ''));
|
|
|
- if (Number.isFinite(n)) return Math.round(n);
|
|
|
- return null;
|
|
|
+function toRatePercentStringFromValue(val: unknown): string | undefined {
|
|
|
+ const n = typeof val === 'number' ? val : Number(val);
|
|
|
+ if (!Number.isFinite(n)) return undefined;
|
|
|
+ if (n === 0) return '0';
|
|
|
+ const scaled = n * 100;
|
|
|
+ const rounded = Math.round(scaled * 100) / 100;
|
|
|
+ const s = rounded.toFixed(2).replace(/\.00$/, '').replace(/(\.\d)0$/, '$1');
|
|
|
+ return `${s}%`;
|
|
|
}
|
|
|
|
|
|
function parseCookiesFromAccount(cookieData: string | null): PlaywrightCookie[] {
|
|
|
@@ -162,92 +151,56 @@ async function createBrowserForAccount(proxy: ProxyConfig | null): Promise<{ bro
|
|
|
return { browser, shouldClose: false };
|
|
|
}
|
|
|
|
|
|
-function parseDouyinExcel(
|
|
|
- filePath: string
|
|
|
-): Map<string, { recordDate: Date } & Record<string, any>> {
|
|
|
- const wb = XLSX.readFile(filePath);
|
|
|
- const result = new Map<string, { recordDate: Date } & Record<string, any>>();
|
|
|
-
|
|
|
- logger.info(
|
|
|
- `[DY Import] Excel loaded. file=${path.basename(filePath)} sheets=${wb.SheetNames.join(' | ')}`
|
|
|
- );
|
|
|
-
|
|
|
- for (const sheetName of wb.SheetNames) {
|
|
|
- const sheet = wb.Sheets[sheetName];
|
|
|
- const rows = XLSX.utils.sheet_to_json<Record<string, any>>(sheet, { defval: '' });
|
|
|
-
|
|
|
- if (!rows.length) {
|
|
|
- logger.warn(`[DY Import] Sheet empty. name=${sheetName}`);
|
|
|
- continue;
|
|
|
- }
|
|
|
-
|
|
|
- const keys = Object.keys(rows[0] || {});
|
|
|
- logger.info(`[DY Import] Sheet parsed. name=${sheetName} rows=${rows.length} keys=${keys.join(',')}`);
|
|
|
-
|
|
|
- const normalizeKey = (k: string) => k.replace(/^\uFEFF/, '').trim();
|
|
|
-
|
|
|
- for (const row of rows) {
|
|
|
- const rawKeys = Object.keys(row || {});
|
|
|
- if (!rawKeys.length) continue;
|
|
|
- const keysNormalized = rawKeys.map((k) => ({ raw: k, norm: normalizeKey(k) }));
|
|
|
-
|
|
|
- // 兼容 Excel 表头带 BOM/空格:优先找包含“日期”的列作为日期列
|
|
|
- const dateKey =
|
|
|
- keysNormalized.find((k) => k.norm === '日期')?.raw ??
|
|
|
- keysNormalized.find((k) => k.norm.includes('日期'))?.raw ??
|
|
|
- keysNormalized.find((k) => k.norm.toLowerCase() === 'date')?.raw ??
|
|
|
- keysNormalized[0]!.raw;
|
|
|
-
|
|
|
- const dateVal = (row as any)[dateKey];
|
|
|
-
|
|
|
- const d = normalizeDateText(dateVal);
|
|
|
- if (!d) continue;
|
|
|
-
|
|
|
- const key = `${d.getFullYear()}-${String(d.getMonth() + 1).padStart(2, '0')}-${String(d.getDate()).padStart(2, '0')}`;
|
|
|
- if (!result.has(key)) result.set(key, { recordDate: d });
|
|
|
- const obj = result.get(key)!;
|
|
|
-
|
|
|
- // 抖音导出的 Excel 通常是两列:日期 + 指标名(如“作品分享/净增粉丝/作品点赞/播放量...”)
|
|
|
- // 因此优先按“第二列标题”做自动映射,避免漏掉“沈凉音”这种全量导出格式。
|
|
|
- const metricKeyRaw = keysNormalized.find((k) => k.raw !== dateKey)?.raw;
|
|
|
- if (!metricKeyRaw) continue;
|
|
|
- const metricKey = normalizeKey(metricKeyRaw);
|
|
|
-
|
|
|
- // 显式排除:主页访问 / 取关粉丝
|
|
|
- if (metricKey.includes('主页访问') || metricKey.includes('取关粉丝')) continue;
|
|
|
+type DashboardMetricTrendPoint = {
|
|
|
+ date_time?: string; // YYYYMMDD
|
|
|
+ value?: number;
|
|
|
+ douyin_value?: number;
|
|
|
+ xigua_value?: number;
|
|
|
+ yumme_value?: number;
|
|
|
+ change_rate?: number;
|
|
|
+};
|
|
|
|
|
|
- const rawVal = (row as any)[metricKeyRaw];
|
|
|
- if (rawVal === undefined || rawVal === null) continue;
|
|
|
+type DashboardMetricItem = {
|
|
|
+ english_metric_name?: string;
|
|
|
+ metric_name?: string;
|
|
|
+ metric_value?: number;
|
|
|
+ trends?: DashboardMetricTrendPoint[];
|
|
|
+};
|
|
|
|
|
|
- // 1)封面点击率:字符串百分比直接存
|
|
|
- if (metricKey.includes('封面点击率')) {
|
|
|
- const s = String(rawVal).trim();
|
|
|
- if (s) (obj as any).coverClickRate = s;
|
|
|
- continue;
|
|
|
- }
|
|
|
+type DashboardResponse = {
|
|
|
+ status_code?: number;
|
|
|
+ status_msg?: string;
|
|
|
+ metrics?: DashboardMetricItem[];
|
|
|
+};
|
|
|
|
|
|
- // 2)其余按数值解析
|
|
|
- const n = parseChineseNumberLike(rawVal);
|
|
|
- if (typeof n !== 'number') continue;
|
|
|
-
|
|
|
- if (metricKey.includes('播放')) (obj as any).playCount = n;
|
|
|
- else if (metricKey.includes('点赞')) (obj as any).likeCount = n;
|
|
|
- else if (metricKey.includes('评论')) (obj as any).commentCount = n;
|
|
|
- else if (metricKey.includes('分享')) (obj as any).shareCount = n;
|
|
|
- else if (metricKey.includes('净增粉丝') || metricKey.includes('新增粉丝')) (obj as any).fansIncrease = n;
|
|
|
- // 总粉丝数/总粉丝量:入库 fans_count
|
|
|
- else if (metricKey.includes('总粉丝')) (obj as any).fansCount = n;
|
|
|
- }
|
|
|
- }
|
|
|
+function parseYmdCompactToDate(ymd: unknown): Date | null {
|
|
|
+ const s = String(ymd || '').trim();
|
|
|
+ if (!/^\d{8}$/.test(s)) return null;
|
|
|
+ const yyyy = Number(s.slice(0, 4));
|
|
|
+ const mm = Number(s.slice(4, 6));
|
|
|
+ const dd = Number(s.slice(6, 8));
|
|
|
+ if (!yyyy || !mm || !dd) return null;
|
|
|
+ const d = new Date(yyyy, mm - 1, dd);
|
|
|
+ d.setHours(0, 0, 0, 0);
|
|
|
+ return d;
|
|
|
+}
|
|
|
|
|
|
- return result;
|
|
|
+function pickTrendValue(pt: DashboardMetricTrendPoint): number | undefined {
|
|
|
+ // 优先使用聚合 value;若不存在则兜底 douyin_value
|
|
|
+ const v =
|
|
|
+ typeof pt?.value === 'number'
|
|
|
+ ? pt.value
|
|
|
+ : typeof pt?.douyin_value === 'number'
|
|
|
+ ? pt.douyin_value
|
|
|
+ : undefined;
|
|
|
+ if (!Number.isFinite(v as number)) return undefined;
|
|
|
+ return v as number;
|
|
|
}
|
|
|
|
|
|
export class DouyinAccountOverviewImportService {
|
|
|
private accountRepository = AppDataSource.getRepository(PlatformAccount);
|
|
|
private userDayStatisticsService = new UserDayStatisticsService();
|
|
|
|
|
|
- private downloadDir = path.resolve(process.cwd(), 'tmp', 'douyin-account-overview');
|
|
|
private stateDir = path.resolve(process.cwd(), 'tmp', 'douyin-storage-state');
|
|
|
|
|
|
private getStatePath(accountId: number) {
|
|
|
@@ -316,8 +269,6 @@ export class DouyinAccountOverviewImportService {
|
|
|
* 为所有抖音账号导出“账号总览-短视频-数据表现-近30天”并导入 user_day_statistics
|
|
|
*/
|
|
|
async runDailyImportForAllDouyinAccounts(): Promise<void> {
|
|
|
- await ensureDir(this.downloadDir);
|
|
|
-
|
|
|
const accounts = await this.accountRepository.find({
|
|
|
where: { platform: 'douyin' as any },
|
|
|
});
|
|
|
@@ -463,94 +414,87 @@ export class DouyinAccountOverviewImportService {
|
|
|
}
|
|
|
await page.waitForTimeout(1200);
|
|
|
|
|
|
- // 逐个指标导出(排除:主页访问 / 取关粉丝)
|
|
|
- // 说明:抖音导出通常是“日期 + 指标”两列,每次只能导出当前选中的指标
|
|
|
- // 注意:抖音 UI 上“总粉丝”文案可能是「总粉丝量」而不是「总粉丝数」
|
|
|
- const metricsToExport: Array<{ name: string; candidates: string[] }> = [
|
|
|
- { name: '播放量', candidates: ['播放量'] },
|
|
|
- { name: '作品点赞', candidates: ['作品点赞', '点赞'] },
|
|
|
- { name: '作品评论', candidates: ['作品评论', '评论'] },
|
|
|
- { name: '作品分享', candidates: ['作品分享', '分享'] },
|
|
|
- { name: '封面点击率', candidates: ['封面点击率'] },
|
|
|
- { name: '净增粉丝', candidates: ['净增粉丝', '新增粉丝'] },
|
|
|
- { name: '总粉丝量', candidates: ['总粉丝量', '总粉丝数', '粉丝总量'] },
|
|
|
- ];
|
|
|
-
|
|
|
let totalInserted = 0;
|
|
|
let totalUpdated = 0;
|
|
|
- let mergedDays = new Map<string, { recordDate: Date } & Record<string, any>>();
|
|
|
- const savedExcelPaths: string[] = [];
|
|
|
-
|
|
|
- const clickMetric = async (metric: { name: string; candidates: string[] }) => {
|
|
|
- // 先精确匹配,失败后用包含匹配(适配 UI 文案差异)
|
|
|
- for (const c of metric.candidates) {
|
|
|
- const locatorExact = page.getByText(c, { exact: true }).first();
|
|
|
- const exactCount = await locatorExact.count().catch(() => 0);
|
|
|
- if (exactCount > 0) {
|
|
|
- await locatorExact.click().catch(() => undefined);
|
|
|
- await page.waitForTimeout(800);
|
|
|
- return c;
|
|
|
- }
|
|
|
- }
|
|
|
- for (const c of metric.candidates) {
|
|
|
- const locatorFuzzy = page.getByText(c, { exact: false }).first();
|
|
|
- const fuzzyCount = await locatorFuzzy.count().catch(() => 0);
|
|
|
- if (fuzzyCount > 0) {
|
|
|
- await locatorFuzzy.click().catch(() => undefined);
|
|
|
- await page.waitForTimeout(800);
|
|
|
- return c;
|
|
|
- }
|
|
|
- }
|
|
|
- logger.warn(`[DY Import] metric not found on page. accountId=${account.id} metric=${metric.name}`);
|
|
|
- return null;
|
|
|
- };
|
|
|
+ const apiUrl = 'https://creator.douyin.com/janus/douyin/creator/data/overview/dashboard';
|
|
|
|
|
|
- for (const metric of metricsToExport) {
|
|
|
- logger.info(`[DY Import] accountId=${account.id} exporting metric: ${metric.name}...`);
|
|
|
- await clickMetric(metric);
|
|
|
-
|
|
|
- const [download] = await Promise.all([
|
|
|
- page.waitForEvent('download', { timeout: 60_000 }),
|
|
|
- page.getByText('导出数据', { exact: true }).first().click(),
|
|
|
- ]);
|
|
|
-
|
|
|
- const filename = `${account.id}_${Date.now()}_${download.suggestedFilename()}`;
|
|
|
- const filePath = path.join(this.downloadDir, filename);
|
|
|
- await download.saveAs(filePath);
|
|
|
- // 保留 Excel 不删除,便于核对数据;路径打日志方便查看
|
|
|
- const absolutePath = path.resolve(filePath);
|
|
|
- savedExcelPaths.push(absolutePath);
|
|
|
- logger.info(
|
|
|
- `[DY Import] Excel saved (${metric.name}): ${absolutePath}`
|
|
|
- );
|
|
|
+ logger.info(`[DY Import] accountId=${account.id} fetch dashboard (POST recent_days=30)...`);
|
|
|
+
|
|
|
+ // 优先监听页面自身是否会发起该请求;若没有,则在页面上下文里手动 fetch(浏览器自动带 cookie)
|
|
|
+ const responsePromise = page
|
|
|
+ .waitForResponse(
|
|
|
+ (res) => res.request().method() === 'POST' && res.url().includes('/janus/douyin/creator/data/overview/dashboard'),
|
|
|
+ { timeout: 8000 }
|
|
|
+ )
|
|
|
+ .catch(() => null);
|
|
|
|
|
|
+ const evalPromise = page.evaluate(async (url) => {
|
|
|
try {
|
|
|
- const perDay = parseDouyinExcel(filePath);
|
|
|
- // 合并不同指标到同一日期 patch(与小红书维度一致)
|
|
|
- for (const [k, v] of perDay.entries()) {
|
|
|
- if (!mergedDays.has(k)) mergedDays.set(k, { recordDate: v.recordDate });
|
|
|
- const base = mergedDays.get(k)!;
|
|
|
- Object.assign(base, v);
|
|
|
- }
|
|
|
- logger.info(
|
|
|
- `[DY Import] metric exported & parsed. accountId=${account.id} metric=${metric.name} file=${path.basename(filePath)} days=${perDay.size}`
|
|
|
- );
|
|
|
- } finally {
|
|
|
- // 默认导入后删除 Excel,避免磁盘堆积;仅在显式 KEEP_DY_XLSX=true 时保留(用于调试)
|
|
|
- if (process.env.KEEP_DY_XLSX === 'true') {
|
|
|
- logger.warn(`[DY Import] KEEP_DY_XLSX=true, keep file: ${filePath}`);
|
|
|
- } else {
|
|
|
- await fs.unlink(filePath).catch(() => undefined);
|
|
|
- }
|
|
|
+ const r = await fetch(url, {
|
|
|
+ method: 'POST',
|
|
|
+ credentials: 'include',
|
|
|
+ headers: {
|
|
|
+ accept: 'application/json, text/plain, */*',
|
|
|
+ 'content-type': 'application/json',
|
|
|
+ },
|
|
|
+ body: JSON.stringify({ recent_days: 30 }),
|
|
|
+ });
|
|
|
+ const json = await r.json().catch(() => null);
|
|
|
+ return json;
|
|
|
+ } catch (e: any) {
|
|
|
+ return { status_code: -1, status_msg: String(e?.message || e) };
|
|
|
}
|
|
|
+ }, apiUrl);
|
|
|
+
|
|
|
+ const [res, evalJson] = await Promise.all([responsePromise, evalPromise]);
|
|
|
+ const body = ((res ? await res.json().catch(() => null) : null) ?? evalJson ?? null) as DashboardResponse | null;
|
|
|
+
|
|
|
+ if (!body || typeof body !== 'object') {
|
|
|
+ throw new Error('overview/dashboard 响应不是 JSON');
|
|
|
+ }
|
|
|
+ if (Number(body.status_code) !== 0) {
|
|
|
+ throw new Error(`overview/dashboard 返回非成功: code=${body.status_code} msg=${body.status_msg || ''}`);
|
|
|
}
|
|
|
|
|
|
- // 汇总:本账号导出的 7 个 Excel 已解析
|
|
|
- logger.info(
|
|
|
- `[DY Import] accountId=${account.id} 共 ${savedExcelPaths.length} 个 Excel 已解析`
|
|
|
- );
|
|
|
- if (savedExcelPaths.length !== 7) {
|
|
|
- logger.warn(`[DY Import] accountId=${account.id} 预期 7 个 Excel,实际 ${savedExcelPaths.length} 个`);
|
|
|
+ const metrics = Array.isArray(body.metrics) ? body.metrics : [];
|
|
|
+ if (!metrics.length) {
|
|
|
+ logger.warn(`[DY Import] dashboard metrics empty. accountId=${account.id}`);
|
|
|
+ }
|
|
|
+
|
|
|
+ const mergedDays = new Map<string, { recordDate: Date } & Record<string, any>>();
|
|
|
+
|
|
|
+ const setDay = (d: Date) => {
|
|
|
+ const key = `${d.getFullYear()}-${String(d.getMonth() + 1).padStart(2, '0')}-${String(d.getDate()).padStart(2, '0')}`;
|
|
|
+ if (!mergedDays.has(key)) mergedDays.set(key, { recordDate: d });
|
|
|
+ return mergedDays.get(key)!;
|
|
|
+ };
|
|
|
+
|
|
|
+ for (const m of metrics) {
|
|
|
+ const en = String(m?.english_metric_name || '').trim();
|
|
|
+ const trends = Array.isArray(m?.trends) ? m.trends : [];
|
|
|
+ if (!en || !trends.length) continue;
|
|
|
+
|
|
|
+ for (const pt of trends) {
|
|
|
+ const d = parseYmdCompactToDate(pt?.date_time);
|
|
|
+ if (!d) continue;
|
|
|
+ const obj = setDay(d);
|
|
|
+ const v = pickTrendValue(pt);
|
|
|
+ if (v === undefined) continue;
|
|
|
+
|
|
|
+ // 显式排除:主页访问 / 取关粉丝(库里没有对应字段)
|
|
|
+ if (en === 'homepage_view_cnt' || en === 'cancel_fans_cnt') continue;
|
|
|
+
|
|
|
+ if (en === 'total_fans_cnt') (obj as any).fansCount = Math.round(v);
|
|
|
+ else if (en === 'play_cnt') (obj as any).playCount = Math.round(v);
|
|
|
+ else if (en === 'digg_cnt') (obj as any).likeCount = Math.round(v);
|
|
|
+ else if (en === 'comment_cnt') (obj as any).commentCount = Math.round(v);
|
|
|
+ else if (en === 'share_count') (obj as any).shareCount = Math.round(v);
|
|
|
+ else if (en === 'net_fans_cnt') (obj as any).fansIncrease = Math.round(v);
|
|
|
+ else if (en === 'cover_click_ratio') {
|
|
|
+ const s = toRatePercentStringFromValue(v);
|
|
|
+ if (s != null) (obj as any).coverClickRate = s;
|
|
|
+ }
|
|
|
+ }
|
|
|
}
|
|
|
|
|
|
// 合并完成后统一入库(避免同一天多次 update)
|