Browse Source

Merge branches 'main' and 'main' of http://gitlab.pubdata.cn/hlm/multi-platform-media-manage

Ethanfly 2 days ago
parent
commit
aac86c9fe1

+ 1 - 18
server/python/platforms/xiaohongshu.py

@@ -2009,7 +2009,7 @@ class XiaohongshuPublisher(BasePublisher):
     async def get_note_base(self, cookies: str, note_id: str) -> dict:
         """
         调用创作者中心「笔记数据- note/base」接口,用于每日作品数据同步。
-        登录方式与打开后台一致:使用账号已存 Cookie,不启浏览器。
+        使用账号已存 Cookie,不启浏览器,直接带 Referer 调 note/base
         """
         import aiohttp
 
@@ -2020,13 +2020,6 @@ class XiaohongshuPublisher(BasePublisher):
         cookie_list = self.parse_cookies(cookies)
         cookie_dict = {c.get("name") or "": c.get("value") or "" for c in cookie_list if c.get("name")}
 
-        session_headers = {
-            "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
-            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
-            "Accept-Language": "zh-CN,zh;q=0.9,en;q=0.8",
-            "Accept-Encoding": "gzip, deflate, br",
-            "Connection": "keep-alive",
-        }
         api_headers = {
             "Accept": "application/json, text/plain, */*",
             "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
@@ -2036,19 +2029,9 @@ class XiaohongshuPublisher(BasePublisher):
             "Connection": "keep-alive",
         }
 
-        warmup_url = "https://creator.xiaohongshu.com/statistics/account/v2"
         api_url = f"https://creator.xiaohongshu.com/api/galaxy/creator/datacenter/note/base?note_id={note_id}"
 
         async with aiohttp.ClientSession(cookies=cookie_dict) as session:
-            try:
-                await session.get(
-                    warmup_url,
-                    headers=session_headers,
-                    timeout=aiohttp.ClientTimeout(total=20),
-                )
-            except Exception as e:
-                print(f"[{self.platform_name}] warmup statistics/account/v2 failed (non-fatal): {e}")
-
             async with session.get(
                 api_url,
                 headers=api_headers,

+ 65 - 1
server/src/services/AccountService.ts

@@ -1,4 +1,4 @@
-import { AppDataSource, PlatformAccount, AccountGroup } from '../models/index.js';
+import { AppDataSource, PlatformAccount, AccountGroup } from '../models/index.js';
 import { AppError } from '../middleware/error.js';
 import { ERROR_CODES, HTTP_STATUS } from '@media-manager/shared';
 import type {
@@ -245,6 +245,8 @@ export class AccountService {
 
     if (existing) {
       // 更新已存在的账号
+      const wasExpired = existing.status === 'expired';
+
       await this.accountRepository.update(existing.id, {
         cookieData: cookieData, // 存储原始数据(可能是加密的)
         accountName: accountInfo.accountName,
@@ -264,6 +266,16 @@ export class AccountService {
         logger.warn(`[addAccount] Background refresh failed for existing account ${existing.id}:`, err);
       });
 
+      // 账号从 expired 恢复为 active:异步补齐最近 30 天用户每日数据
+      if (wasExpired && updated && updated.status === 'active') {
+        this.backfillDailyStatisticsForReactivatedAccountAsync(updated).catch(err => {
+          logger.warn(
+            `[addAccount] Daily statistics backfill failed for reactivated account ${updated.id}:`,
+            err
+          );
+        });
+      }
+
       return this.formatAccount(updated!);
     }
 
@@ -363,6 +375,46 @@ export class AccountService {
     }
   }
 
+  /**
+   * 账号从失效(expired)恢复为 active 时,为该账号补齐最近 30 天的用户每日数据(user_day_statistics)
+   * 注意:仅在状态从 expired → active 的切换时触发,且按平台调用对应的单账号导入入口
+   */
+  private async backfillDailyStatisticsForReactivatedAccountAsync(
+    account: PlatformAccount
+  ): Promise<void> {
+    const platform = account.platform as PlatformType;
+
+    // 延迟几秒,避免与前端后续操作/其他浏览器任务抢占资源
+    await new Promise((resolve) => setTimeout(resolve, 3000));
+
+    try {
+      if (platform === 'xiaohongshu') {
+        await XiaohongshuAccountOverviewImportService.runDailyImportForAccount(account.id);
+      } else if (platform === 'douyin') {
+        await DouyinAccountOverviewImportService.runDailyImportForAccount(account.id);
+      } else if (platform === 'baijiahao') {
+        await BaijiahaoContentOverviewImportService.runDailyImportForAccount(account.id);
+      } else if (platform === 'weixin_video') {
+        await WeixinVideoDataCenterImportService.runDailyImportForAccount(account.id);
+      } else {
+        logger.info(
+          `[AccountService] Skip daily statistics backfill for unsupported platform ${platform} (accountId=${account.id})`
+        );
+        return;
+      }
+
+      logger.info(
+        `[AccountService] Completed daily statistics backfill for reactivated account ${account.id} (${platform})`
+      );
+    } catch (error) {
+      logger.warn(
+        `[AccountService] Daily statistics backfill failed for reactivated account ${account.id} (${platform}):`,
+        error
+      );
+      // 出错不影响账号激活本身
+    }
+  }
+
   async updateAccount(
     userId: number,
     accountId: number,
@@ -411,6 +463,7 @@ export class AccountService {
     }
 
     const platform = account.platform as PlatformType;
+    const wasExpired = account.status === 'expired';
     const updateData: Partial<PlatformAccount> = {
       updatedAt: new Date(),
     };
@@ -597,6 +650,17 @@ export class AccountService {
 
     const updated = await this.accountRepository.findOne({ where: { id: accountId } });
 
+    // 账号从 expired 恢复为 active:异步补齐最近 30 天用户每日数据
+    const newStatus = updated?.status ?? account.status;
+    if (wasExpired && newStatus === 'active' && updated) {
+      this.backfillDailyStatisticsForReactivatedAccountAsync(updated).catch((error) => {
+        logger.warn(
+          `[AccountService] Daily statistics backfill failed after refresh for reactivated account ${updated.id}:`,
+          error
+        );
+      });
+    }
+
     // 刷新账号时不再写入 user_day_statistics,仅保留:每天定时任务 + 添加账号时的 initStatisticsForNewAccountAsync
     // if (updated) {
     //   try {

+ 14 - 0
server/src/services/BaijiahaoContentOverviewImportService.ts

@@ -535,6 +535,20 @@ export class BaijiahaoContentOverviewImportService {
   }
 
   /**
+   * 单账号入口:仅为指定百家号账号执行近30天「内容分析-基础数据」+粉丝数据导入(用于账号从失效恢复为 active 时补数)
+   */
+  static async runDailyImportForAccount(accountId: number): Promise<void> {
+    const svc = new BaijiahaoContentOverviewImportService();
+    const account = await svc.accountRepository.findOne({
+      where: { id: accountId, platform: 'baijiahao' as any },
+    });
+    if (!account) {
+      throw new Error(`未找到百家号账号 id=${accountId}`);
+    }
+    await svc.importAccountLast30Days(account);
+  }
+
+  /**
    * 为所有百家号账号导出“数据中心-内容分析-基础数据-近30天”并导入 user_day_statistics
    */
   async runDailyImportForAllBaijiahaoAccounts(): Promise<void> {

+ 14 - 0
server/src/services/DouyinAccountOverviewImportService.ts

@@ -266,6 +266,20 @@ export class DouyinAccountOverviewImportService {
   }
 
   /**
+   * 单账号入口:仅为指定抖音账号执行近30天账号总览导入(用于账号从失效恢复为 active 时补数)
+   */
+  static async runDailyImportForAccount(accountId: number): Promise<void> {
+    const svc = new DouyinAccountOverviewImportService();
+    const account = await svc.accountRepository.findOne({
+      where: { id: accountId, platform: 'douyin' as any },
+    });
+    if (!account) {
+      throw new Error(`未找到抖音账号 id=${accountId}`);
+    }
+    await svc.importAccountLast30Days(account);
+  }
+
+  /**
    * 为所有抖音账号导出“账号总览-短视频-数据表现-近30天”并导入 user_day_statistics
    */
   async runDailyImportForAllDouyinAccounts(): Promise<void> {

+ 14 - 0
server/src/services/WeixinVideoDataCenterImportService.ts

@@ -589,6 +589,20 @@ export class WeixinVideoDataCenterImportService {
     await svc.runDailyImportForAllWeixinVideoAccounts();
   }
 
+  /**
+   * 单账号入口:仅为指定视频号账号执行近30天「数据中心-各子菜单-增长详情」导入(用于账号从失效恢复为 active 时补数)
+   */
+  static async runDailyImportForAccount(accountId: number): Promise<void> {
+    const svc = new WeixinVideoDataCenterImportService();
+    const account = await svc.accountRepository.findOne({
+      where: { id: accountId, platform: 'weixin_video' as any },
+    });
+    if (!account) {
+      throw new Error(`未找到视频号账号 id=${accountId}`);
+    }
+    await svc.importAccountLast30Days(account);
+  }
+
   async runDailyImportForAllWeixinVideoAccounts(): Promise<void> {
     await ensureDir(this.downloadDir);
     const accounts = await this.accountRepository.find({ where: { platform: 'weixin_video' as any } });

+ 28 - 0
server/src/services/XiaohongshuAccountOverviewImportService.ts

@@ -337,6 +337,34 @@ export class XiaohongshuAccountOverviewImportService {
   }
 
   /**
+   * 单账号入口:仅为指定账号执行近30日账号概览导入(用于账号从失效恢复为 active 时补数)
+   */
+  static async runDailyImportForAccount(accountId: number): Promise<void> {
+    const svc = new XiaohongshuAccountOverviewImportService();
+    const account = await svc.accountRepository.findOne({
+      where: { id: accountId, platform: 'xiaohongshu' as any },
+    });
+    if (!account) {
+      throw new Error(`未找到小红书账号 id=${accountId}`);
+    }
+    await svc.importAccountLast30Days(account);
+  }
+
+  /**
+   * 单账号入口:仅为指定账号执行近30日账号概览导入(用于账号从失效恢复为 active 时补数)
+   */
+  static async runDailyImportForAccount(accountId: number): Promise<void> {
+    const svc = new XiaohongshuAccountOverviewImportService();
+    const account = await svc.accountRepository.findOne({
+      where: { id: accountId, platform: 'xiaohongshu' as any },
+    });
+    if (!account) {
+      throw new Error(`未找到小红书账号 id=${accountId}`);
+    }
+    await svc.importAccountLast30Days(account);
+  }
+
+  /**
    * 为所有小红书账号导出“观看数据-近30日”并导入 user_day_statistics
    */
   async runDailyImportForAllXhsAccounts(): Promise<void> {

+ 94 - 34
server/src/services/XiaohongshuWorkNoteStatisticsImportService.ts

@@ -3,7 +3,6 @@ import { AppDataSource, PlatformAccount, Work } from '../models/index.js';
 import { logger } from '../utils/logger.js';
 import { WorkDayStatisticsService } from './WorkDayStatisticsService.js';
 import { AccountService } from './AccountService.js';
-import { getPythonServiceBaseUrl } from './PythonServiceConfigService.js';
 import type { ProxyConfig } from '@media-manager/shared';
 import { BrowserManager } from '../automation/browser.js';
 import { In } from 'typeorm';
@@ -98,32 +97,41 @@ function normalizeSameSite(value: unknown): 'Strict' | 'Lax' | 'None' | undefine
   return undefined;
 }
 
+/** 与进入后台/每日用户数据一致:每条 cookie 仅保留 url 或 domain 其一,且 domain 仅在有有效值时设置,避免 Playwright addCookies 报错 */
 function parseCookiesFromAccount(cookieData: string | null): PlaywrightCookie[] {
   if (!cookieData) return [];
   const raw = cookieData.trim();
   if (!raw) return [];
 
-  try {
-    const parsed = JSON.parse(raw) as (PlaywrightCookie & { sameSite?: unknown })[];
-    if (Array.isArray(parsed)) {
-      return parsed.map((c) => {
-        const sameSite = normalizeSameSite(c.sameSite);
-        const out: PlaywrightCookie = {
-          name: String(c.name ?? '').trim(),
-          value: String(c.value ?? '').trim(),
-          url: c.url || 'https://creator.xiaohongshu.com',
-        };
-        if (c.domain != null) out.domain = String(c.domain);
-        if (c.path != null) out.path = String(c.path);
-        if (c.expires != null && Number.isFinite(Number(c.expires))) out.expires = Number(c.expires);
-        if (typeof c.httpOnly === 'boolean') out.httpOnly = c.httpOnly;
-        if (typeof c.secure === 'boolean') out.secure = c.secure;
-        if (sameSite) out.sameSite = sameSite;
-        return out;
-      }).filter((c) => c.name.length > 0);
+  if (raw.startsWith('[') || raw.startsWith('{')) {
+    try {
+      const parsed = JSON.parse(raw);
+      const arr = Array.isArray(parsed) ? parsed : (parsed?.cookies ? parsed.cookies : []);
+      if (!Array.isArray(arr)) return [];
+      return arr
+        .map((c: Record<string, unknown>) => {
+          const name = String(c?.name ?? '').trim();
+          const value = String(c?.value ?? '').trim();
+          if (!name) return null;
+          const domain = c?.domain ? String(c.domain).trim() : undefined;
+          const pathVal = c?.path ? String(c.path) : '/';
+          const url = !domain ? 'https://creator.xiaohongshu.com' : undefined;
+          const sameSite = normalizeSameSite(c?.sameSite);
+          return {
+            name,
+            value,
+            ...(domain ? { domain } : { url }),
+            path: pathVal,
+            expires: typeof c?.expires === 'number' ? c.expires : undefined,
+            httpOnly: typeof c?.httpOnly === 'boolean' ? c.httpOnly : undefined,
+            secure: typeof c?.secure === 'boolean' ? c.secure : undefined,
+            ...(sameSite ? { sameSite } : {}),
+          } as PlaywrightCookie;
+        })
+        .filter(Boolean) as PlaywrightCookie[];
+    } catch {
+      // fallthrough
     }
-  } catch {
-    // fallthrough
   }
 
   const pairs = raw.split(';').map((p) => p.trim()).filter(Boolean);
@@ -367,6 +375,9 @@ export class XiaohongshuWorkNoteStatisticsImportService {
       let totalInserted = 0;
       let totalUpdated = 0;
       const total = works.length;
+      let skippedNoNoteId = 0;
+      let skippedNoData = 0;
+      let skippedNoPatches = 0;
 
       for (let i = 0; i < works.length; i++) {
         const work = works[i];
@@ -374,15 +385,22 @@ export class XiaohongshuWorkNoteStatisticsImportService {
           options.onProgress({ index: i + 1, total, work });
         }
         const noteId = (work.platformVideoId || '').trim();
-        if (!noteId) continue;
+        if (!noteId) {
+          skippedNoNoteId++;
+          continue;
+        }
 
         try {
-          let data: NoteBaseData | null = await this.fetchNoteBaseViaPython(account, noteId);
+          // 直接走浏览器(无 Python 前置),保持与「进入后台 / 作品同步」完全一致的登录方式
+          await ensureBrowser();
+          const data: NoteBaseData | null = await this.fetchNoteBaseData(page!, noteId);
           if (!data) {
-            await ensureBrowser();
-            data = await this.fetchNoteBaseData(page!, noteId);
+            skippedNoData++;
+            logger.warn(
+              `[XHS WorkStats] Python 与浏览器均未拿到 note/base 数据,跳过。 accountId=${account.id} workId=${work.id} noteId=${noteId}`
+            );
+            continue;
           }
-          if (!data) continue;
 
           // 同步 base 顶层“汇总指标”到 works 表(用于作品列表/总览等按 work 累计口径展示)
           await this.applyWorkSnapshotFromBaseData(work.id, data).catch((e) => {
@@ -401,7 +419,14 @@ export class XiaohongshuWorkNoteStatisticsImportService {
             lastAllowed.setDate(lastAllowed.getDate() + 13); // 发布当日 + 13 天 = 共 14 天
             patches = patches.filter((p) => p.recordDate.getTime() <= lastAllowed.getTime());
           }
-          if (!patches.length) continue;
+          if (!patches.length) {
+            skippedNoPatches++;
+            const dayKeys = data?.day && typeof data.day === 'object' ? Object.keys(data.day) : [];
+            logger.warn(
+              `[XHS WorkStats] note/base 无日维度数据,跳过入库。 accountId=${account.id} workId=${work.id} noteId=${noteId} data.day keys=[${dayKeys.join(',')}]`
+            );
+            continue;
+          }
 
           const result = await this.workDayStatisticsService.saveStatisticsForDateBatch(
             patches.map((p) => ({
@@ -477,6 +502,11 @@ export class XiaohongshuWorkNoteStatisticsImportService {
         }
       }
 
+      if (skippedNoNoteId > 0 || skippedNoData > 0 || skippedNoPatches > 0) {
+        logger.info(
+          `[XHS WorkStats] accountId=${account.id} 跳过统计: 无 noteId=${skippedNoNoteId} 无数据=${skippedNoData} 无日维度=${skippedNoPatches}`
+        );
+      }
       logger.info(
         `[XHS WorkStats] accountId=${account.id} completed. inserted=${totalInserted}, updated=${totalUpdated}`
       );
@@ -492,7 +522,10 @@ export class XiaohongshuWorkNoteStatisticsImportService {
     }
   }
 
-  /** 通过 Python 调用 note/base(登录与打开后台一致:使用账号已存 Cookie) */
+  /**
+   * 通过 Python 调用 note/base(与进入后台/每日用户数据一致:仅用账号已存 Cookie,不启浏览器)。
+   * 若返回 null,会触发浏览器兜底;通常原因:Python 服务未开/地址错、Cookie 失效(创作者端 401)、或响应结构变化。
+   */
   private async fetchNoteBaseViaPython(
     account: PlatformAccount,
     noteId: string
@@ -500,7 +533,10 @@ export class XiaohongshuWorkNoteStatisticsImportService {
     const base = (await getPythonServiceBaseUrl()).replace(/\/$/, '');
     const url = `${base}/xiaohongshu/note_base`;
     const cookie = String(account.cookieData || '').trim();
-    if (!cookie) return null;
+    if (!cookie) {
+      logger.warn(`[XHS WorkStats] fetchNoteBaseViaPython: accountId=${account.id} cookie 为空`);
+      return null;
+    }
 
     const controller = new AbortController();
     const timeoutId = setTimeout(() => controller.abort(), 35_000);
@@ -512,12 +548,36 @@ export class XiaohongshuWorkNoteStatisticsImportService {
         body: JSON.stringify({ cookie, note_id: noteId }),
       });
       const text = await res.text();
-      const body = text ? (JSON.parse(text) as { data?: unknown; code?: number }) : null;
-      if (!body || typeof body !== 'object') return null;
-      const data = body.data;
-      if (!data || typeof data !== 'object') return null;
+      if (!res.ok) {
+        logger.warn(
+          `[XHS WorkStats] fetchNoteBaseViaPython: Python 返回 HTTP ${res.status} accountId=${account.id} noteId=${noteId} body=${text.slice(0, 200)}`
+        );
+        return null;
+      }
+      const body = text ? (JSON.parse(text) as { data?: unknown; code?: number; msg?: string }) : null;
+      if (!body || typeof body !== 'object') {
+        logger.warn(`[XHS WorkStats] fetchNoteBaseViaPython: 响应非 JSON accountId=${account.id} noteId=${noteId}`);
+        return null;
+      }
+      const code = body.code;
+      let data = body.data;
+      if (!data || typeof data !== 'object') {
+        logger.warn(
+          `[XHS WorkStats] fetchNoteBaseViaPython: 无 data 或 data 非对象 accountId=${account.id} noteId=${noteId} code=${code} msg=${(body as any).msg ?? ''}`
+        );
+        return null;
+      }
+      // 兼容接口返回 data.data(如 { data: { data: { day, view_count, ... } } })
+      const inner = (data as Record<string, unknown>).data;
+      if (inner && typeof inner === 'object' && (inner as Record<string, unknown>).day !== undefined) {
+        data = inner;
+      }
       return data as NoteBaseData;
-    } catch {
+    } catch (e) {
+      logger.warn(
+        `[XHS WorkStats] fetchNoteBaseViaPython: 请求异常 accountId=${account.id} noteId=${noteId}`,
+        e instanceof Error ? e.message : e
+      );
       return null;
     } finally {
       clearTimeout(timeoutId);