|
|
@@ -1,6 +1,7 @@
|
|
|
/// <reference lib="dom" />
|
|
|
import { chromium, type BrowserContext, type Page } from 'playwright';
|
|
|
import { logger } from '../utils/logger.js';
|
|
|
+import { extractDeclaredNotesCountFromPostedResponse } from '../utils/xiaohongshu.js';
|
|
|
import type { PlatformType } from '@media-manager/shared';
|
|
|
|
|
|
// Python 服务配置
|
|
|
@@ -79,6 +80,9 @@ export interface AccountInfo {
|
|
|
fansCount?: number;
|
|
|
worksCount: number;
|
|
|
worksList?: WorkItem[];
|
|
|
+ worksListComplete?: boolean;
|
|
|
+ source?: 'python' | 'playwright' | 'api';
|
|
|
+ pythonAvailable?: boolean;
|
|
|
}
|
|
|
|
|
|
export interface WorkItem {
|
|
|
@@ -512,19 +516,40 @@ class HeadlessBrowserService {
|
|
|
/**
|
|
|
* 通过 Python API 获取作品列表
|
|
|
*/
|
|
|
- private async fetchWorksViaPython(platform: PlatformType, cookies: CookieData[]): Promise<WorkItem[]> {
|
|
|
+ private async fetchWorksViaPython(
|
|
|
+ platform: PlatformType,
|
|
|
+ cookies: CookieData[],
|
|
|
+ onPage?: (info: {
|
|
|
+ platform: PlatformType;
|
|
|
+ page: string;
|
|
|
+ pageSize: number;
|
|
|
+ fetched: number;
|
|
|
+ newCount: number;
|
|
|
+ totalSoFar: number;
|
|
|
+ declaredTotal?: number;
|
|
|
+ hasMore: boolean;
|
|
|
+ nextPage?: unknown;
|
|
|
+ }) => void
|
|
|
+ ): Promise<{
|
|
|
+ works: WorkItem[];
|
|
|
+ total?: number;
|
|
|
+ }> {
|
|
|
logger.info(`[Python API] Fetching works for ${platform} (auto pagination)...`);
|
|
|
|
|
|
const cookieString = JSON.stringify(cookies);
|
|
|
const pythonPlatform = platform === 'weixin_video' ? 'weixin' : platform;
|
|
|
|
|
|
- const pageSize = 50;
|
|
|
- const maxPages = 30;
|
|
|
+ const pageSize = platform === 'xiaohongshu' ? 20 : 50;
|
|
|
+ let maxPages = 30;
|
|
|
const allWorks: WorkItem[] = [];
|
|
|
const seenIds = new Set<string>();
|
|
|
+ let declaredTotal: number | undefined;
|
|
|
|
|
|
- for (let page = 0; page < maxPages; page++) {
|
|
|
- logger.info(`[Python API] Fetching works page=${page}, page_size=${pageSize} for ${platform}`);
|
|
|
+ let cursor: string | number = 0;
|
|
|
+ const seenCursors = new Set<string>();
|
|
|
+ for (let pageIndex = 0; pageIndex < maxPages; pageIndex++) {
|
|
|
+ const pageParam = platform === 'xiaohongshu' ? cursor : pageIndex;
|
|
|
+ logger.info(`[Python API] Fetching works page=${String(pageParam)}, page_size=${pageSize} for ${platform}`);
|
|
|
|
|
|
const response = await fetch(`${PYTHON_SERVICE_URL}/works`, {
|
|
|
method: 'POST',
|
|
|
@@ -534,8 +559,9 @@ class HeadlessBrowserService {
|
|
|
body: JSON.stringify({
|
|
|
platform: pythonPlatform,
|
|
|
cookie: cookieString,
|
|
|
- page,
|
|
|
+ page: pageParam,
|
|
|
page_size: pageSize,
|
|
|
+ auto_paging: platform === 'xiaohongshu' && pageIndex === 0,
|
|
|
}),
|
|
|
});
|
|
|
|
|
|
@@ -549,6 +575,13 @@ class HeadlessBrowserService {
|
|
|
throw new Error(result.error || 'Failed to get works');
|
|
|
}
|
|
|
|
|
|
+ if (typeof result.total === 'number' && result.total > 0) {
|
|
|
+ declaredTotal = declaredTotal ? Math.max(declaredTotal, result.total) : result.total;
|
|
|
+ if (pageIndex === 0) {
|
|
|
+ maxPages = Math.min(400, Math.ceil(result.total / pageSize) + 5);
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
const pageWorks: WorkItem[] = (result.works || []).map((work: {
|
|
|
work_id: string;
|
|
|
title: string;
|
|
|
@@ -585,27 +618,69 @@ class HeadlessBrowserService {
|
|
|
}
|
|
|
|
|
|
logger.info(
|
|
|
- `[Python API] Page ${page} fetched=${pageWorks.length}, new=${newCount}, total=${allWorks.length}, has_more=${!!result.has_more}`
|
|
|
+ `[Python API] Page ${String(pageParam)} fetched=${pageWorks.length}, new=${newCount}, total=${allWorks.length}, has_more=${!!result.has_more}, declared_total=${declaredTotal || 0}, next_page=${String(result.next_page ?? '')}`
|
|
|
);
|
|
|
+ onPage?.({
|
|
|
+ platform,
|
|
|
+ page: String(pageParam),
|
|
|
+ pageSize,
|
|
|
+ fetched: pageWorks.length,
|
|
|
+ newCount,
|
|
|
+ totalSoFar: allWorks.length,
|
|
|
+ declaredTotal,
|
|
|
+ hasMore: !!result.has_more,
|
|
|
+ nextPage: result.next_page,
|
|
|
+ });
|
|
|
+
|
|
|
+ if (platform === 'xiaohongshu') {
|
|
|
+ const next = result.next_page;
|
|
|
+ const expectedMore = declaredTotal && declaredTotal > 0 ? allWorks.length < declaredTotal : !!result.has_more;
|
|
|
+
|
|
|
+ if (next !== undefined && next !== null && next !== '' && next !== -1 && next !== '-1') {
|
|
|
+ const key = String(next);
|
|
|
+ if (seenCursors.has(key)) break;
|
|
|
+ seenCursors.add(key);
|
|
|
+ cursor = next;
|
|
|
+ } else {
|
|
|
+ cursor = (typeof cursor === 'number' ? cursor + 1 : pageIndex + 1);
|
|
|
+ }
|
|
|
|
|
|
- if (!result.has_more || pageWorks.length === 0 || newCount === 0) {
|
|
|
- break;
|
|
|
+ if (!expectedMore || pageWorks.length === 0 || newCount === 0) break;
|
|
|
+ } else {
|
|
|
+ if (!result.has_more || pageWorks.length === 0 || newCount === 0) break;
|
|
|
}
|
|
|
}
|
|
|
|
|
|
logger.info(`[Python API] Total works fetched for ${platform}: ${allWorks.length}`);
|
|
|
- return allWorks;
|
|
|
+ return { works: allWorks, total: declaredTotal };
|
|
|
}
|
|
|
|
|
|
/**
|
|
|
* 获取账号信息(优先使用 Python API,回退到无头浏览器)
|
|
|
*/
|
|
|
- async fetchAccountInfo(platform: PlatformType, cookies: CookieData[]): Promise<AccountInfo> {
|
|
|
+ async fetchAccountInfo(
|
|
|
+ platform: PlatformType,
|
|
|
+ cookies: CookieData[],
|
|
|
+ options?: {
|
|
|
+ onWorksFetchProgress?: (info: {
|
|
|
+ platform: PlatformType;
|
|
|
+ page: string;
|
|
|
+ pageSize: number;
|
|
|
+ fetched: number;
|
|
|
+ newCount: number;
|
|
|
+ totalSoFar: number;
|
|
|
+ declaredTotal?: number;
|
|
|
+ hasMore: boolean;
|
|
|
+ nextPage?: unknown;
|
|
|
+ }) => void;
|
|
|
+ }
|
|
|
+ ): Promise<AccountInfo> {
|
|
|
logger.info(`[fetchAccountInfo] Starting for platform: ${platform}`);
|
|
|
+ let pythonAvailable = false;
|
|
|
|
|
|
// 百家号:优先走 Python 的 /account_info(包含粉丝数、作品数),避免 Node 直连分散认证问题
|
|
|
if (platform === 'baijiahao') {
|
|
|
- const pythonAvailable = await this.checkPythonServiceAvailable();
|
|
|
+ pythonAvailable = await this.checkPythonServiceAvailable();
|
|
|
if (pythonAvailable) {
|
|
|
logger.info(`[Python API] Service available, fetching account_info for baijiahao`);
|
|
|
try {
|
|
|
@@ -618,7 +693,10 @@ class HeadlessBrowserService {
|
|
|
}
|
|
|
|
|
|
// Python 不可用或失败时,回退到 Node 直连 API(可能仍会遇到分散认证问题)
|
|
|
- return this.fetchBaijiahaoAccountInfoDirectApi(cookies);
|
|
|
+ const info = await this.fetchBaijiahaoAccountInfoDirectApi(cookies);
|
|
|
+ info.source = 'api';
|
|
|
+ info.pythonAvailable = pythonAvailable;
|
|
|
+ return info;
|
|
|
}
|
|
|
|
|
|
// 对于支持的平台,尝试使用 Python API 获取作品列表和账号信息
|
|
|
@@ -626,11 +704,15 @@ class HeadlessBrowserService {
|
|
|
const supportedPlatforms: PlatformType[] = ['douyin', 'xiaohongshu', 'kuaishou', 'weixin_video', 'baijiahao'];
|
|
|
|
|
|
if (supportedPlatforms.includes(platform)) {
|
|
|
- const pythonAvailable = await this.checkPythonServiceAvailable();
|
|
|
+ pythonAvailable = await this.checkPythonServiceAvailable();
|
|
|
if (pythonAvailable) {
|
|
|
logger.info(`[Python API] Service available, trying to fetch works for ${platform}`);
|
|
|
try {
|
|
|
- const worksList = await this.fetchWorksViaPython(platform, cookies);
|
|
|
+ const { works: worksList, total: worksTotal } = await this.fetchWorksViaPython(
|
|
|
+ platform,
|
|
|
+ cookies,
|
|
|
+ options?.onWorksFetchProgress
|
|
|
+ );
|
|
|
|
|
|
// 如果成功获取到作品,使用 Playwright 获取账号基本信息
|
|
|
if (worksList.length > 0) {
|
|
|
@@ -652,15 +734,21 @@ class HeadlessBrowserService {
|
|
|
}
|
|
|
|
|
|
accountInfo.worksList = worksList;
|
|
|
- // 直接使用 Python API 获取的作品数量(最准确,排除了已删除/私密视频)
|
|
|
- accountInfo.worksCount = worksList.length;
|
|
|
+ // 账号展示的作品数优先用 Python 返回的 total(更接近创作者中心“全部笔记”),否则回退到抓到的列表长度
|
|
|
+ accountInfo.worksCount = worksTotal && worksTotal > 0 ? worksTotal : worksList.length;
|
|
|
+ accountInfo.worksListComplete = worksTotal && worksTotal > 0 ? worksList.length >= worksTotal : undefined;
|
|
|
+ accountInfo.source = 'python';
|
|
|
+ accountInfo.pythonAvailable = true;
|
|
|
logger.info(`[fetchAccountInfo] Using Python API works count for ${platform}: ${accountInfo.worksCount}`);
|
|
|
return accountInfo;
|
|
|
} catch (playwrightError) {
|
|
|
logger.warn(`[Playwright] Failed to get account info for ${platform}:`, playwrightError);
|
|
|
const accountInfo = this.getDefaultAccountInfo(platform);
|
|
|
accountInfo.worksList = worksList;
|
|
|
- accountInfo.worksCount = worksList.length;
|
|
|
+ accountInfo.worksCount = worksTotal && worksTotal > 0 ? worksTotal : worksList.length;
|
|
|
+ accountInfo.worksListComplete = worksTotal && worksTotal > 0 ? worksList.length >= worksTotal : undefined;
|
|
|
+ accountInfo.source = 'python';
|
|
|
+ accountInfo.pythonAvailable = true;
|
|
|
return accountInfo;
|
|
|
}
|
|
|
}
|
|
|
@@ -677,7 +765,10 @@ class HeadlessBrowserService {
|
|
|
|
|
|
// 使用 Playwright 获取账号信息
|
|
|
logger.info(`[Playwright] Fetching account info for ${platform}`);
|
|
|
- return this.fetchAccountInfoWithPlaywright(platform, cookies);
|
|
|
+ const info = await this.fetchAccountInfoWithPlaywright(platform, cookies);
|
|
|
+ info.source = 'playwright';
|
|
|
+ info.pythonAvailable = pythonAvailable;
|
|
|
+ return info;
|
|
|
}
|
|
|
|
|
|
/**
|
|
|
@@ -1758,6 +1849,7 @@ class HeadlessBrowserService {
|
|
|
|
|
|
// 获取作品列表 - 通过监听 API 接口
|
|
|
const worksList: WorkItem[] = [];
|
|
|
+ let worksListComplete: boolean | undefined;
|
|
|
try {
|
|
|
logger.info('[Xiaohongshu] Navigating to note manager page to fetch works...');
|
|
|
|
|
|
@@ -1777,13 +1869,70 @@ class HeadlessBrowserService {
|
|
|
shareCount: number;
|
|
|
}> = [];
|
|
|
|
|
|
- let currentPage = 0;
|
|
|
- let hasMorePages = true;
|
|
|
- const maxPages = 20; // 最多获取20页,防止无限循环
|
|
|
+ let maxPages = 120;
|
|
|
|
|
|
// 设置 API 响应监听器 - 在导航之前绑定
|
|
|
let apiResponseReceived = false;
|
|
|
let totalNotesCount = 0; // 从 tags 中获取的总作品数
|
|
|
+ let stoppedByMaxPages = false;
|
|
|
+ const seenNoteIds = new Set<string>();
|
|
|
+
|
|
|
+ const upsertNotesFromPayload = (payload: any) => {
|
|
|
+ if (!payload) return;
|
|
|
+ const declaredTotal = extractDeclaredNotesCountFromPostedResponse(payload);
|
|
|
+ if (declaredTotal > 0) {
|
|
|
+ totalNotesCount = Math.max(totalNotesCount, declaredTotal);
|
|
|
+ }
|
|
|
+
|
|
|
+ if (totalNotesCount > 0) {
|
|
|
+ const estimatedPages = Math.ceil(totalNotesCount / 20) + 5;
|
|
|
+ maxPages = Math.max(maxPages, Math.min(500, estimatedPages));
|
|
|
+ }
|
|
|
+
|
|
|
+ const notes = payload.notes || [];
|
|
|
+ for (const note of notes) {
|
|
|
+ const noteId = note.id || '';
|
|
|
+ if (!noteId || seenNoteIds.has(noteId)) continue;
|
|
|
+ seenNoteIds.add(noteId);
|
|
|
+
|
|
|
+ let coverUrl = note.images_list?.[0]?.url || '';
|
|
|
+ if (coverUrl.startsWith('http://')) {
|
|
|
+ coverUrl = coverUrl.replace('http://', 'https://');
|
|
|
+ }
|
|
|
+ const duration = note.video_info?.duration || 0;
|
|
|
+
|
|
|
+ allNotesData.push({
|
|
|
+ noteId,
|
|
|
+ title: note.display_title || '',
|
|
|
+ coverUrl,
|
|
|
+ status: note.tab_status || 1,
|
|
|
+ publishTime: note.time || '',
|
|
|
+ type: note.type || 'normal',
|
|
|
+ duration,
|
|
|
+ likeCount: note.likes || 0,
|
|
|
+ commentCount: note.comments_count || 0,
|
|
|
+ collectCount: note.collected_count || 0,
|
|
|
+ viewCount: note.view_count || 0,
|
|
|
+ shareCount: note.shared_count || 0,
|
|
|
+ });
|
|
|
+ }
|
|
|
+ };
|
|
|
+
|
|
|
+ const fetchNotesPage = async (pageNum: number) => {
|
|
|
+ return await page.evaluate(async (p) => {
|
|
|
+ const response = await fetch(
|
|
|
+ `https://edith.xiaohongshu.com/web_api/sns/v5/creator/note/user/posted?tab=0&page=${p}`,
|
|
|
+ {
|
|
|
+ method: 'GET',
|
|
|
+ credentials: 'include',
|
|
|
+ headers: {
|
|
|
+ Accept: 'application/json',
|
|
|
+ },
|
|
|
+ }
|
|
|
+ );
|
|
|
+ return await response.json();
|
|
|
+ }, pageNum);
|
|
|
+ };
|
|
|
|
|
|
const notesApiHandler = async (response: import('playwright').Response) => {
|
|
|
const url = response.url();
|
|
|
@@ -1798,56 +1947,8 @@ class HeadlessBrowserService {
|
|
|
|
|
|
if ((data?.success || data?.code === 0) && data?.data) {
|
|
|
apiResponseReceived = true;
|
|
|
-
|
|
|
- // 从 tags 中获取总作品数
|
|
|
- // tags 数组中 id="special.note_time_desc" 的项("所有笔记")包含总数
|
|
|
- if (data.data.tags && Array.isArray(data.data.tags)) {
|
|
|
- const allNotesTag = data.data.tags.find((tag: { id?: string; notes_count?: number }) =>
|
|
|
- tag.id === 'special.note_time_desc'
|
|
|
- );
|
|
|
- if (allNotesTag?.notes_count !== undefined) {
|
|
|
- totalNotesCount = allNotesTag.notes_count;
|
|
|
- logger.info(`[Xiaohongshu API] Total notes count from tags: ${totalNotesCount}`);
|
|
|
- }
|
|
|
- }
|
|
|
-
|
|
|
- const notes = data.data.notes || [];
|
|
|
- for (const note of notes) {
|
|
|
- // 根据 API 返回格式解析数据
|
|
|
- // images_list 是数组,第一个元素包含 url
|
|
|
- // 将 http:// 转换为 https:// 以确保图片能正常加载
|
|
|
- let coverUrl = note.images_list?.[0]?.url || '';
|
|
|
- if (coverUrl.startsWith('http://')) {
|
|
|
- coverUrl = coverUrl.replace('http://', 'https://');
|
|
|
- }
|
|
|
- const duration = note.video_info?.duration || 0;
|
|
|
-
|
|
|
- logger.info(`[Xiaohongshu API] Note: id=${note.id}, title="${note.display_title}", cover=${coverUrl ? coverUrl.slice(0, 60) + '...' : 'none'}`);
|
|
|
-
|
|
|
- allNotesData.push({
|
|
|
- noteId: note.id || '',
|
|
|
- title: note.display_title || '',
|
|
|
- coverUrl: coverUrl,
|
|
|
- status: note.tab_status || 1, // 1=已发布
|
|
|
- publishTime: note.time || '',
|
|
|
- type: note.type || 'normal', // video/normal
|
|
|
- duration: duration,
|
|
|
- likeCount: note.likes || 0,
|
|
|
- commentCount: note.comments_count || 0,
|
|
|
- collectCount: note.collected_count || 0,
|
|
|
- viewCount: note.view_count || 0,
|
|
|
- shareCount: note.shared_count || 0,
|
|
|
- });
|
|
|
- }
|
|
|
-
|
|
|
- // 检查是否还有更多页面
|
|
|
- // page=-1 表示没有更多数据
|
|
|
- if (data.data.page === -1 || notes.length === 0) {
|
|
|
- hasMorePages = false;
|
|
|
- logger.info(`[Xiaohongshu API] No more pages (page indicator: ${data.data.page})`);
|
|
|
- }
|
|
|
+ upsertNotesFromPayload(data.data);
|
|
|
} else {
|
|
|
- hasMorePages = false;
|
|
|
}
|
|
|
}
|
|
|
} catch (e) {
|
|
|
@@ -1882,134 +1983,37 @@ class HeadlessBrowserService {
|
|
|
logger.info('[Xiaohongshu] No notes captured via listener, trying direct API call...');
|
|
|
|
|
|
try {
|
|
|
- // 直接在页面上下文中调用 API
|
|
|
- const apiResponse = await page.evaluate(async () => {
|
|
|
- const response = await fetch('https://edith.xiaohongshu.com/web_api/sns/v5/creator/note/user/posted?tab=0&page=0', {
|
|
|
- method: 'GET',
|
|
|
- credentials: 'include',
|
|
|
- headers: {
|
|
|
- 'Accept': 'application/json',
|
|
|
- },
|
|
|
- });
|
|
|
- return await response.json();
|
|
|
- });
|
|
|
-
|
|
|
+ const apiResponse = await fetchNotesPage(0);
|
|
|
logger.info(`[Xiaohongshu] Direct API call result: success=${apiResponse?.success}, code=${apiResponse?.code}`);
|
|
|
-
|
|
|
if ((apiResponse?.success || apiResponse?.code === 0) && apiResponse?.data) {
|
|
|
- // 从 tags 中获取总作品数
|
|
|
- if (apiResponse.data.tags && Array.isArray(apiResponse.data.tags)) {
|
|
|
- const allNotesTag = apiResponse.data.tags.find((tag: { id?: string; notes_count?: number }) =>
|
|
|
- tag.id === 'special.note_time_desc'
|
|
|
- );
|
|
|
- if (allNotesTag?.notes_count !== undefined) {
|
|
|
- totalNotesCount = allNotesTag.notes_count;
|
|
|
- logger.info(`[Xiaohongshu API Direct] Total notes count from tags: ${totalNotesCount}`);
|
|
|
- }
|
|
|
- }
|
|
|
-
|
|
|
- const notes = apiResponse.data.notes || [];
|
|
|
- for (const note of notes) {
|
|
|
- // 将 http:// 转换为 https://
|
|
|
- let coverUrl = note.images_list?.[0]?.url || '';
|
|
|
- if (coverUrl.startsWith('http://')) {
|
|
|
- coverUrl = coverUrl.replace('http://', 'https://');
|
|
|
- }
|
|
|
- const duration = note.video_info?.duration || 0;
|
|
|
-
|
|
|
- logger.info(`[Xiaohongshu API Direct] Note: id=${note.id}, cover=${coverUrl ? coverUrl.slice(0, 60) + '...' : 'none'}`);
|
|
|
-
|
|
|
- allNotesData.push({
|
|
|
- noteId: note.id || '',
|
|
|
- title: note.display_title || '',
|
|
|
- coverUrl: coverUrl,
|
|
|
- status: note.tab_status || 1,
|
|
|
- publishTime: note.time || '',
|
|
|
- type: note.type || 'normal',
|
|
|
- duration: duration,
|
|
|
- likeCount: note.likes || 0,
|
|
|
- commentCount: note.comments_count || 0,
|
|
|
- collectCount: note.collected_count || 0,
|
|
|
- viewCount: note.view_count || 0,
|
|
|
- shareCount: note.shared_count || 0,
|
|
|
- });
|
|
|
- }
|
|
|
-
|
|
|
- // 获取更多页面
|
|
|
- let pageNum = 1;
|
|
|
- let lastPage = apiResponse.data.page;
|
|
|
- while (lastPage !== -1 && pageNum < maxPages) {
|
|
|
- const nextResponse = await page.evaluate(async (p) => {
|
|
|
- const response = await fetch(`https://edith.xiaohongshu.com/web_api/sns/v5/creator/note/user/posted?tab=0&page=${p}`, {
|
|
|
- method: 'GET',
|
|
|
- credentials: 'include',
|
|
|
- });
|
|
|
- return await response.json();
|
|
|
- }, pageNum);
|
|
|
-
|
|
|
- if (nextResponse?.data?.notes?.length > 0) {
|
|
|
- for (const note of nextResponse.data.notes) {
|
|
|
- // 将 http:// 转换为 https://
|
|
|
- let coverUrl = note.images_list?.[0]?.url || '';
|
|
|
- if (coverUrl.startsWith('http://')) {
|
|
|
- coverUrl = coverUrl.replace('http://', 'https://');
|
|
|
- }
|
|
|
- const duration = note.video_info?.duration || 0;
|
|
|
-
|
|
|
- allNotesData.push({
|
|
|
- noteId: note.id || '',
|
|
|
- title: note.display_title || '',
|
|
|
- coverUrl: coverUrl,
|
|
|
- status: note.tab_status || 1,
|
|
|
- publishTime: note.time || '',
|
|
|
- type: note.type || 'normal',
|
|
|
- duration: duration,
|
|
|
- likeCount: note.likes || 0,
|
|
|
- commentCount: note.comments_count || 0,
|
|
|
- collectCount: note.collected_count || 0,
|
|
|
- viewCount: note.view_count || 0,
|
|
|
- shareCount: note.shared_count || 0,
|
|
|
- });
|
|
|
- }
|
|
|
- pageNum++;
|
|
|
- lastPage = nextResponse.data.page;
|
|
|
- if (lastPage === -1) break;
|
|
|
- } else {
|
|
|
- break;
|
|
|
- }
|
|
|
- }
|
|
|
+ upsertNotesFromPayload(apiResponse.data);
|
|
|
}
|
|
|
} catch (apiError) {
|
|
|
logger.warn('[Xiaohongshu] Direct API call failed:', apiError);
|
|
|
}
|
|
|
}
|
|
|
|
|
|
- // 如果还是没有数据,尝试滚动加载
|
|
|
- if (allNotesData.length === 0) {
|
|
|
- logger.info('[Xiaohongshu] Still no notes, trying scroll to trigger API...');
|
|
|
- await page.waitForTimeout(2000);
|
|
|
- }
|
|
|
-
|
|
|
- // 滚动加载更多页面(如果通过监听器获取的数据)
|
|
|
- while (hasMorePages && currentPage < maxPages && allNotesData.length > 0 && apiResponseReceived) {
|
|
|
- currentPage++;
|
|
|
- const previousCount = allNotesData.length;
|
|
|
-
|
|
|
- // 滚动到页面底部触发加载更多
|
|
|
- await page.evaluate(() => {
|
|
|
- window.scrollTo(0, document.body.scrollHeight);
|
|
|
- });
|
|
|
-
|
|
|
- // 等待新数据加载
|
|
|
- await page.waitForTimeout(2000);
|
|
|
+ if (allNotesData.length > 0) {
|
|
|
+ let pageNum = 1;
|
|
|
+ while (pageNum < maxPages) {
|
|
|
+ if (totalNotesCount > 0 && seenNoteIds.size >= totalNotesCount) break;
|
|
|
+ let nextResponse: any;
|
|
|
+ try {
|
|
|
+ nextResponse = await fetchNotesPage(pageNum);
|
|
|
+ } catch (e) {
|
|
|
+ logger.warn(`[Xiaohongshu] Page fetch failed: page=${pageNum}`, e);
|
|
|
+ break;
|
|
|
+ }
|
|
|
|
|
|
- // 如果没有新数据,退出循环
|
|
|
- if (allNotesData.length === previousCount) {
|
|
|
- logger.info(`[Xiaohongshu] No new notes loaded after scroll, stopping at page ${currentPage}`);
|
|
|
- break;
|
|
|
+ if (!(nextResponse?.success || nextResponse?.code === 0) || !nextResponse?.data) break;
|
|
|
+ const before = seenNoteIds.size;
|
|
|
+ upsertNotesFromPayload(nextResponse.data);
|
|
|
+ const after = seenNoteIds.size;
|
|
|
+ if (after === before) break;
|
|
|
+ pageNum++;
|
|
|
+ await page.waitForTimeout(600);
|
|
|
}
|
|
|
-
|
|
|
- logger.info(`[Xiaohongshu] Page ${currentPage}: total ${allNotesData.length} notes`);
|
|
|
+ if (pageNum >= maxPages) stoppedByMaxPages = true;
|
|
|
}
|
|
|
|
|
|
// 移除监听器
|
|
|
@@ -2048,21 +2052,19 @@ class HeadlessBrowserService {
|
|
|
|
|
|
logger.info(`[Xiaohongshu] Fetched ${worksList.length} works via API`);
|
|
|
|
|
|
- // 更新作品数:直接使用获取到的 notes 数量(更准确)
|
|
|
- // 只有当 notes 为空时才使用 tags 中的 notes_count
|
|
|
- if (worksList.length > 0) {
|
|
|
- worksCount = worksList.length;
|
|
|
- logger.info(`[Xiaohongshu] Using actual notes count: ${worksCount}`);
|
|
|
- } else if (totalNotesCount > 0) {
|
|
|
+ if (totalNotesCount > 0) {
|
|
|
+ worksListComplete = worksList.length >= totalNotesCount;
|
|
|
worksCount = totalNotesCount;
|
|
|
- logger.info(`[Xiaohongshu] Using notes count from tags: ${worksCount}`);
|
|
|
+ } else if (worksList.length > 0) {
|
|
|
+ worksListComplete = !stoppedByMaxPages;
|
|
|
+ worksCount = worksList.length;
|
|
|
}
|
|
|
} catch (worksError) {
|
|
|
logger.warn('[Xiaohongshu] Failed to fetch works list:', worksError);
|
|
|
}
|
|
|
|
|
|
logger.info(`[Xiaohongshu] Final account info: id=${accountId}, name=${accountName}, fans=${fansCount}, works=${worksCount}`);
|
|
|
- return { accountId, accountName, avatarUrl, fansCount, worksCount, worksList };
|
|
|
+ return { accountId, accountName, avatarUrl, fansCount, worksCount, worksList, worksListComplete };
|
|
|
} catch (error) {
|
|
|
logger.warn('[Xiaohongshu] Failed to fetch account info:', error);
|
|
|
}
|