Просмотр исходного кода

fix: 修复作品管理相关Bug (#6071,#6073,#6074,#6085)

ethanfly 3 дней назад
Родитель
Сommit
26184337b6

+ 8 - 1
client/src/views/Works/index.vue

@@ -544,11 +544,18 @@ async function loadWorks() {
   } finally {
     loading.value = false;
   }
+  // 同步更新统计数据(应用当前筛选条件)
+  loadStats();
 }
 
 async function loadStats() {
   try {
-    const result = await request.get('/api/works/stats') as WorkStats;
+    const params: Record<string, unknown> = {};
+    if (filter.accountId) params.accountId = filter.accountId;
+    if (filter.platform) params.platform = filter.platform;
+    if (filter.status) params.status = filter.status;
+    if (filter.keyword) params.keyword = filter.keyword;
+    const result = await request.get('/api/works/stats', { params }) as WorkStats;
     if (result) {
       stats.value = result;
     }

+ 2 - 0
server/python/app.py

@@ -1276,6 +1276,8 @@ def get_works():
         # 执行获取作品
         if platform == "xiaohongshu" and auto_paging and hasattr(publisher, "get_all_works"):
             result = asyncio.run(publisher.get_all_works(cookie_str))
+        elif platform == "baijiahao" and auto_paging and hasattr(publisher, "get_all_works"):
+            result = asyncio.run(publisher.get_all_works(cookie_str))
         else:
             result = asyncio.run(publisher.run_get_works(cookie_str, page, page_size))
         

+ 263 - 1
server/python/platforms/baijiahao.py

@@ -3018,7 +3018,269 @@ class BaijiahaoPublisher(BasePublisher):
             has_more=has_more,
             next_page=next_page
         )
-    
+
+    async def get_all_works(self, cookies: str) -> WorksResult:
+        """
+        获取百家号全部作品列表(自动分页,复用浏览器实例)。
+        避免每页都启动新浏览器导致的性能问题和风控触发。
+        """
+        import re
+
+        print(f"\n{'='*60}")
+        print(f"[{self.platform_name}] 获取全部作品列表(自动分页)")
+        print(f"{'='*60}")
+
+        all_works: List[WorkItem] = []
+        seen_ids = set()
+        total = 0
+        current_page = 1
+        page_size = 20
+        max_pages = 50  # 最多50页
+
+        try:
+            cookie_list = self.parse_cookies(cookies)
+            await self.init_browser()
+            await self.set_cookies(cookie_list)
+
+            if not self.page:
+                raise Exception("Page not initialized")
+
+            # 打开内容管理页以建立会话并提取 token
+            content_url = (
+                "https://baijiahao.baidu.com/builder/rc/content"
+                f"?currentPage={current_page}&pageSize={page_size}"
+                "&search=&type=&collection=&startDate=&endDate="
+            )
+            await self.page.goto(content_url, wait_until="domcontentloaded", timeout=60000)
+            await asyncio.sleep(3)
+
+            # 检查登录状态
+            current_url = self.page.url
+            if "passport.baidu.com" in current_url or "login" in current_url:
+                raise Exception("Cookie 已过期,请重新登录百家号账号")
+
+            # 提取 token
+            token = await self.page.evaluate(
+                """
+                () => {
+                  const isJwtLike = (v) => {
+                    if (!v || typeof v !== 'string') return false;
+                    const s = v.trim();
+                    if (s.length < 60) return false;
+                    const parts = s.split('.');
+                    if (parts.length !== 3) return false;
+                    return parts.every(p => /^[A-Za-z0-9_-]+$/.test(p) && p.length > 10);
+                  };
+
+                  const pickFromStorage = (storage) => {
+                    try {
+                      const keys = Object.keys(storage || {});
+                      for (const k of keys) {
+                        const v = storage.getItem(k);
+                        if (isJwtLike(v)) return v;
+                      }
+                    } catch {}
+                    return "";
+                  };
+
+                  let t = pickFromStorage(window.localStorage);
+                  if (t) return t;
+                  t = pickFromStorage(window.sessionStorage);
+                  if (t) return t;
+
+                  const meta = document.querySelector('meta[name="token"], meta[name="bjh-token"]');
+                  const metaToken = meta && meta.getAttribute('content');
+                  if (isJwtLike(metaToken)) return metaToken;
+
+                  const candidates = [
+                    (window.__INITIAL_STATE__ && window.__INITIAL_STATE__.token) || "",
+                    (window.__PRELOADED_STATE__ && window.__PRELOADED_STATE__.token) || "",
+                    (window.__NUXT__ && window.__NUXT__.state && window.__NUXT__.state.token) || "",
+                  ];
+                  for (const c of candidates) {
+                    if (isJwtLike(c)) return c;
+                  }
+
+                  return "";
+                }
+                """
+            )
+
+            if not token:
+                html = await self.page.content()
+                m = re.search(r'([A-Za-z0-9_-]{10,}\.[A-Za-z0-9_-]{10,}\.[A-Za-z0-9_-]{10,})', html)
+                if m:
+                    token = m.group(1)
+
+            if not token:
+                raise Exception("未能从页面提取 token(可能未登录或触发风控),请重新登录百家号账号后再试")
+
+            print(f"[{self.platform_name}] ✓ Token 提取成功")
+
+            def _pick_cover(item: dict) -> str:
+                cover = item.get("crosswise_cover") or item.get("vertical_cover") or ""
+                if cover:
+                    return cover
+                raw = item.get("cover_images") or ""
+                try:
+                    parsed = json.loads(raw) if isinstance(raw, str) else raw
+                    if isinstance(parsed, list) and parsed:
+                        first = parsed[0]
+                        if isinstance(first, dict):
+                            return first.get("src") or first.get("ori_src") or ""
+                        if isinstance(first, str):
+                            return first
+                except Exception:
+                    pass
+                return ""
+
+            def _pick_duration(item: dict) -> int:
+                for k in ("rmb_duration", "duration", "long"):
+                    try:
+                        v = int(item.get(k) or 0)
+                        if v > 0:
+                            return v
+                    except Exception:
+                        pass
+                ex = item.get("displaytype_exinfo") or ""
+                try:
+                    exj = json.loads(ex) if isinstance(ex, str) and ex else (ex if isinstance(ex, dict) else {})
+                    ugc = (exj.get("ugcvideo") or {}) if isinstance(exj, dict) else {}
+                    vi = ugc.get("video_info") or {}
+                    v = int(vi.get("durationInSecond") or ugc.get("long") or 0)
+                    return v if v > 0 else 0
+                except Exception:
+                    return 0
+
+            def _pick_status(item: dict) -> str:
+                qs = str(item.get("quality_status") or "").lower()
+                st = str(item.get("status") or "").lower()
+                if qs == "rejected" or "reject" in st:
+                    return "rejected"
+                if st in ("draft", "unpublish", "unpublished"):
+                    return "draft"
+                return "published"
+
+            # 分页循环
+            for page_iter in range(max_pages):
+                page_num = page_iter + 1  # 百家号 currentPage 从 1 开始
+                api_url = (
+                    "https://baijiahao.baidu.com/pcui/article/lists"
+                    f"?currentPage={page_num}"
+                    f"&pageSize={page_size}"
+                    "&search=&type=&collection=&startDate=&endDate="
+                    "&clearBeforeFetch=false"
+                    "&dynamic=1"
+                )
+
+                resp = await self.page.evaluate(
+                    """
+                    async ({ url, token }) => {
+                      const r = await fetch(url, {
+                        method: 'GET',
+                        credentials: 'include',
+                        headers: {
+                          'accept': 'application/json, text/plain, */*',
+                          ...(token ? { token } : {}),
+                        },
+                      });
+                      const text = await r.text();
+                      return { ok: r.ok, status: r.status, text };
+                    }
+                    """,
+                    {"url": api_url, "token": token},
+                )
+
+                if not resp or not resp.get("ok"):
+                    print(f"[{self.platform_name}] 第 {page_num} 页请求失败: HTTP {resp.get('status') if isinstance(resp, dict) else 'unknown'}")
+                    break
+
+                api_result = json.loads(resp.get("text") or "{}")
+                errno = api_result.get("errno", -1)
+
+                if errno != 0:
+                    errmsg = api_result.get("errmsg", "unknown error")
+                    print(f"[{self.platform_name}] 第 {page_num} 页接口错误: errno={errno}, errmsg={errmsg}")
+                    if errno in (110, 20040001):
+                        raise Exception("百家号未登录或 Cookie/token 失效,请重新登录后再同步")
+                    # 非登录错误则停止分页
+                    break
+
+                data = api_result.get("data", {}) or {}
+                items = data.get("list", []) or []
+                page_info = data.get("page", {}) or {}
+
+                if page_iter == 0:
+                    total = int(page_info.get("totalCount", 0) or 0)
+                    print(f"[{self.platform_name}] 作品总数: {total}")
+
+                new_count = 0
+                for item in items:
+                    work_id = str(item.get("nid") or item.get("feed_id") or item.get("article_id") or item.get("id") or "")
+                    if not work_id or work_id in seen_ids:
+                        continue
+                    seen_ids.add(work_id)
+                    new_count += 1
+                    all_works.append(
+                        WorkItem(
+                            work_id=work_id,
+                            title=str(item.get("title") or ""),
+                            cover_url=_pick_cover(item),
+                            video_url=str(item.get("url") or ""),
+                            duration=_pick_duration(item),
+                            status=_pick_status(item),
+                            publish_time=str(item.get("publish_time") or item.get("publish_at") or item.get("created_at") or ""),
+                            play_count=int(item.get("read_amount") or 0),
+                            like_count=int(item.get("like_amount") or 0),
+                            comment_count=int(item.get("comment_amount") or 0),
+                            share_count=int(item.get("share_amount") or 0),
+                            collect_count=int(item.get("collection_amount") or 0),
+                        )
+                    )
+
+                total_page = int(page_info.get("totalPage", 0) or 0)
+                has_more = bool(total_page and page_num < total_page)
+
+                print(f"[{self.platform_name}] 第 {page_num}/{total_page or '?'} 页: 获取 {new_count} 个新作品, 累计 {len(all_works)}")
+
+                if not has_more or len(items) == 0 or new_count == 0:
+                    break
+
+                # 页间短暂等待,避免过快触发风控
+                await asyncio.sleep(1)
+
+            print(f"[{self.platform_name}] ✓ 自动分页完成,共获取 {len(all_works)} 个作品")
+
+        except Exception as e:
+            import traceback
+            traceback.print_exc()
+            # 如果已获取到部分作品,仍然返回成功
+            if all_works:
+                print(f"[{self.platform_name}] 虽有异常但已获取 {len(all_works)} 个作品,正常返回")
+                return WorksResult(
+                    success=True,
+                    platform=self.platform_name,
+                    works=all_works,
+                    total=total or len(all_works),
+                    has_more=False,
+                    next_page="",
+                )
+            return WorksResult(
+                success=False,
+                platform=self.platform_name,
+                error=str(e),
+                debug_info="baijiahao_get_all_works_failed"
+            )
+
+        return WorksResult(
+            success=True,
+            platform=self.platform_name,
+            works=all_works,
+            total=total or len(all_works),
+            has_more=False,
+            next_page="",
+        )
+
     async def get_article_stats(
         self,
         cookies: str,

+ 9 - 9
server/python/platforms/xiaohongshu.py

@@ -1451,7 +1451,7 @@ class XiaohongshuPublisher(BasePublisher):
                     flush=True,
                 )
 
-            async def fetch_notes_page(p):
+            async def fetch_notes_page(p, ps=None):
                 # 再次检查签名函数(每次调用前都检查)
                 sign_available = await self.page.evaluate("""() => {
                     return typeof window !== 'undefined' && typeof window._webmsxyw === 'function';
@@ -1465,10 +1465,10 @@ class XiaohongshuPublisher(BasePublisher):
                     await asyncio.sleep(2)
 
                 return await self.page.evaluate(
-                    """async (pageNum) => {
+                    """async ({ pageNum, pageSize }) => {
                         try {
                             // 使用正确的 API 端点:/api/galaxy/v2/creator/note/user/posted
-                            const url = `/api/galaxy/v2/creator/note/user/posted?tab=0&page=${pageNum}`;
+                            const url = `/api/galaxy/v2/creator/note/user/posted?tab=0&page=${pageNum}&page_size=${pageSize}`;
                             const headers = {
                                 'Accept': 'application/json, text/plain, */*',
                                 'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6',
@@ -1578,7 +1578,7 @@ class XiaohongshuPublisher(BasePublisher):
 
             resp = None
             for attempt in range(1, 4):
-                resp = await fetch_notes_page(page)
+                resp = await fetch_notes_page(page, page_size)
 
                 # 打印调试信息
                 if resp and isinstance(resp, dict) and resp.get("_debug"):
@@ -1694,7 +1694,7 @@ class XiaohongshuPublisher(BasePublisher):
                 if len(notes) == 0:
                     has_more = False
                 else:
-                    next_resp = await fetch_notes_page(page + 1)
+                    next_resp = await fetch_notes_page(page + 1, page_size)
                     next_data = (
                         (next_resp or {}).get("data", {})
                         if isinstance(next_resp, dict)
@@ -1813,7 +1813,7 @@ class XiaohongshuPublisher(BasePublisher):
                     flush=True,
                 )
 
-            async def fetch_notes_page(p):
+            async def fetch_notes_page(p, ps=None):
                 # 再次检查签名函数(每次调用前都检查)
                 sign_available = await self.page.evaluate("""() => {
                     return typeof window !== 'undefined' && typeof window._webmsxyw === 'function';
@@ -1827,10 +1827,10 @@ class XiaohongshuPublisher(BasePublisher):
                     await asyncio.sleep(2)
 
                 return await self.page.evaluate(
-                    """async (pageNum) => {
+                    """async ({ pageNum, pageSize }) => {
                         try {
                             // 使用正确的 API 端点:/api/galaxy/v2/creator/note/user/posted
-                            const url = `/api/galaxy/v2/creator/note/user/posted?tab=0&page=${pageNum}`;
+                            const url = `/api/galaxy/v2/creator/note/user/posted?tab=0&page=${pageNum}&page_size=${pageSize}`;
                             const headers = {
                                 'Accept': 'application/json, text/plain, */*',
                                 'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6',
@@ -2210,7 +2210,7 @@ class XiaohongshuPublisher(BasePublisher):
                     f"\n[{self.platform_name}] ---------- 第 {iters} 次请求 (cursor={cursor}) ----------",
                     flush=True,
                 )
-                resp = await fetch_notes_page(cursor)
+                resp = await fetch_notes_page(cursor, api_page_size)
 
                 # 打印调试信息
                 if resp and isinstance(resp, dict) and resp.get("_debug"):

+ 7 - 1
server/src/routes/works.ts

@@ -38,7 +38,13 @@ router.get(
   '/stats',
   asyncHandler(async (req, res) => {
     const userId = req.user!.userId;
-    const stats = await workService.getStats(userId);
+    const params = {
+      accountId: req.query.accountId ? Number(req.query.accountId) : undefined,
+      platform: req.query.platform as string | undefined,
+      status: req.query.status as string | undefined,
+      keyword: req.query.keyword as string | undefined,
+    };
+    const stats = await workService.getStats(userId, params);
     res.json({ success: true, data: stats });
   })
 );

+ 5 - 3
server/src/services/HeadlessBrowserService.ts

@@ -749,7 +749,7 @@ class HeadlessBrowserService {
           cookie: cookieString,
           page: pageParam,
           page_size: pageSize,
-          auto_paging: platform === 'xiaohongshu' && pageIndex === 0,
+          auto_paging: (platform === 'xiaohongshu' || platform === 'baijiahao') && pageIndex === 0,
         }),
       });
 
@@ -4123,8 +4123,9 @@ class HeadlessBrowserService {
     const cookieString = JSON.stringify(cookies);
     const pythonUrl = (await getPythonServiceBaseUrl()).replace(/\/$/, '');
 
-    // 1. 先获取作品列表
-    logger.info(`[${platform} Comments Python] Fetching works list...`);
+    // 1. 先获取作品列表(小红书使用 auto_paging 获取全部,避免只取第一页)
+    const useAutoPaging = platform === 'xiaohongshu';
+    logger.info(`[${platform} Comments Python] Fetching works list (auto_paging=${useAutoPaging})...`);
     const worksResponse = await fetch(`${pythonUrl}/works`, {
       method: 'POST',
       headers: { 'Content-Type': 'application/json' },
@@ -4133,6 +4134,7 @@ class HeadlessBrowserService {
         cookie: cookieString,
         page: 0,
         page_size: 50,
+        auto_paging: useAutoPaging,
       }),
     });
 

+ 18 - 4
server/src/services/WorkService.ts

@@ -53,8 +53,8 @@ export class WorkService {
   /**
    * 获取作品统计
    */
-  async getStats(userId: number): Promise<WorkStats> {
-    const result = await this.workRepository
+  async getStats(userId: number, params?: WorksQueryParams): Promise<WorkStats> {
+    const queryBuilder = this.workRepository
       .createQueryBuilder('work')
       .select([
         'COUNT(*) as totalCount',
@@ -63,8 +63,22 @@ export class WorkService {
         'CAST(SUM(work.likeCount) AS SIGNED BIGINT) as totalLikeCount',
         'CAST(SUM(work.commentCount) AS SIGNED BIGINT) as totalCommentCount',
       ])
-      .where('work.userId = :userId', { userId })
-      .getRawOne();
+      .where('work.userId = :userId', { userId });
+
+    if (params?.accountId) {
+      queryBuilder.andWhere('work.accountId = :accountId', { accountId: params.accountId });
+    }
+    if (params?.platform) {
+      queryBuilder.andWhere('work.platform = :platform', { platform: params.platform });
+    }
+    if (params?.status) {
+      queryBuilder.andWhere('work.status = :status', { status: params.status });
+    }
+    if (params?.keyword) {
+      queryBuilder.andWhere('work.title LIKE :keyword', { keyword: `%${params.keyword}%` });
+    }
+
+    const result = await queryBuilder.getRawOne();
 
     return {
       totalCount: parseInt(result.totalCount) || 0,