|
@@ -732,10 +732,10 @@ class HeadlessBrowserService {
|
|
|
// 抖音、小红书使用 cursor 分页;视频号使用 currentPage 页码(pageIndex 0,1,2...)
|
|
// 抖音、小红书使用 cursor 分页;视频号使用 currentPage 页码(pageIndex 0,1,2...)
|
|
|
const useCursorPagination = platform === 'xiaohongshu' || platform === 'douyin';
|
|
const useCursorPagination = platform === 'xiaohongshu' || platform === 'douyin';
|
|
|
for (let pageIndex = 0; pageIndex < maxPages; pageIndex++) {
|
|
for (let pageIndex = 0; pageIndex < maxPages; pageIndex++) {
|
|
|
- const pageParam = useCursorPagination ? cursor : pageIndex;
|
|
|
|
|
|
|
+ const pageParam: number | string = useCursorPagination ? cursor : pageIndex;
|
|
|
logger.info(`[Python API] Fetching works page=${String(pageParam)}, page_size=${pageSize} for ${platform}`);
|
|
logger.info(`[Python API] Fetching works page=${String(pageParam)}, page_size=${pageSize} for ${platform}`);
|
|
|
|
|
|
|
|
- const response = await fetch(`${PYTHON_SERVICE_URL}/works`, {
|
|
|
|
|
|
|
+ const response: Response = await fetch(`${PYTHON_SERVICE_URL}/works`, {
|
|
|
method: 'POST',
|
|
method: 'POST',
|
|
|
headers: {
|
|
headers: {
|
|
|
'Content-Type': 'application/json',
|
|
'Content-Type': 'application/json',
|
|
@@ -753,7 +753,7 @@ class HeadlessBrowserService {
|
|
|
throw new Error(`Python API returned ${response.status}`);
|
|
throw new Error(`Python API returned ${response.status}`);
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
- const result = await response.json();
|
|
|
|
|
|
|
+ const result: any = await response.json();
|
|
|
|
|
|
|
|
// 记录 Python API 的详细响应(用于调试)
|
|
// 记录 Python API 的详细响应(用于调试)
|
|
|
if (pageIndex === 0) {
|
|
if (pageIndex === 0) {
|
|
@@ -847,7 +847,7 @@ class HeadlessBrowserService {
|
|
|
});
|
|
});
|
|
|
|
|
|
|
|
if (useCursorPagination) {
|
|
if (useCursorPagination) {
|
|
|
- const next = result.next_page;
|
|
|
|
|
|
|
+ const next: any = result.next_page;
|
|
|
const hasNextCursor = next !== undefined && next !== null && next !== '' && next !== -1 && next !== '-1';
|
|
const hasNextCursor = next !== undefined && next !== null && next !== '' && next !== -1 && next !== '-1';
|
|
|
|
|
|
|
|
if (hasNextCursor) {
|
|
if (hasNextCursor) {
|
|
@@ -3933,7 +3933,7 @@ class HeadlessBrowserService {
|
|
|
/**
|
|
/**
|
|
|
* 通过 Python API 获取评论 - 分作品逐个获取
|
|
* 通过 Python API 获取评论 - 分作品逐个获取
|
|
|
*/
|
|
*/
|
|
|
- private async fetchCommentsViaPythonApi(platform: 'douyin' | 'xiaohongshu', cookies: CookieData[]): Promise<WorkComments[]> {
|
|
|
|
|
|
|
+ private async fetchCommentsViaPythonApi(platform: 'douyin' | 'xiaohongshu' | 'weixin', cookies: CookieData[]): Promise<WorkComments[]> {
|
|
|
const allWorkComments: WorkComments[] = [];
|
|
const allWorkComments: WorkComments[] = [];
|
|
|
const cookieString = JSON.stringify(cookies);
|
|
const cookieString = JSON.stringify(cookies);
|
|
|
|
|
|
|
@@ -4895,6 +4895,232 @@ class HeadlessBrowserService {
|
|
|
return allWorkComments;
|
|
return allWorkComments;
|
|
|
}
|
|
}
|
|
|
}
|
|
}
|
|
|
|
|
+
|
|
|
|
|
+ /**
|
|
|
|
|
+ * 获取微信视频号评论 - 优先使用 Python API
|
|
|
|
|
+ */
|
|
|
|
|
+ async fetchWeixinVideoCommentsViaApi(cookies: CookieData[]): Promise<WorkComments[]> {
|
|
|
|
|
+ // 优先使用 Python API(分作品获取)
|
|
|
|
|
+ const pythonAvailable = await this.checkPythonServiceAvailable();
|
|
|
|
|
+ if (pythonAvailable) {
|
|
|
|
|
+ logger.info('[Weixin Video Comments] Using Python API...');
|
|
|
|
|
+ try {
|
|
|
|
|
+ const result = await this.fetchCommentsViaPythonApi('weixin', cookies);
|
|
|
|
|
+ if (result.length > 0) {
|
|
|
|
|
+ return result;
|
|
|
|
|
+ }
|
|
|
|
|
+ logger.info('[Weixin Video Comments] Python API returned empty, falling back to Playwright...');
|
|
|
|
|
+ } catch (pythonError) {
|
|
|
|
|
+ logger.warn('[Weixin Video Comments] Python API failed:', pythonError);
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ // 回退到 Playwright 方式
|
|
|
|
|
+ const browser = await chromium.launch({
|
|
|
|
|
+ headless: true,
|
|
|
|
|
+ args: ['--no-sandbox', '--disable-setuid-sandbox'],
|
|
|
|
|
+ });
|
|
|
|
|
+
|
|
|
|
|
+ const allWorkComments: WorkComments[] = [];
|
|
|
|
|
+
|
|
|
|
|
+ try {
|
|
|
|
|
+ const context = await browser.newContext({
|
|
|
|
|
+ viewport: { width: 1920, height: 1080 },
|
|
|
|
|
+ userAgent: 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
|
|
|
|
|
+ });
|
|
|
|
|
+
|
|
|
|
|
+ // 设置 Cookie
|
|
|
|
|
+ const playwrightCookies = cookies.map(c => ({
|
|
|
|
|
+ name: c.name,
|
|
|
|
|
+ value: c.value,
|
|
|
|
|
+ domain: c.domain || '.weixin.qq.com',
|
|
|
|
|
+ path: c.path || '/',
|
|
|
|
|
+ }));
|
|
|
|
|
+ await context.addCookies(playwrightCookies);
|
|
|
|
|
+ logger.info(`[Weixin Video Comments] Set ${playwrightCookies.length} cookies`);
|
|
|
|
|
+
|
|
|
|
|
+ const page = await context.newPage();
|
|
|
|
|
+
|
|
|
|
|
+ // 用于捕获评论数据
|
|
|
|
|
+ const capturedComments: Map<string, CommentItem[]> = new Map();
|
|
|
|
|
+ const capturedWorks: Array<{
|
|
|
|
|
+ workId: string;
|
|
|
|
|
+ title: string;
|
|
|
|
|
+ coverUrl: string;
|
|
|
|
|
+ }> = [];
|
|
|
|
|
+
|
|
|
|
|
+ // 设置 API 响应监听器
|
|
|
|
|
+ page.on('response', async (response) => {
|
|
|
|
|
+ const url = response.url();
|
|
|
|
|
+ try {
|
|
|
|
|
+ // 监听作品列表 API
|
|
|
|
|
+ if (url.includes('/mmfinderassistant-bin/post/post_list')) {
|
|
|
|
|
+ const data = await response.json();
|
|
|
|
|
+ logger.info(`[Weixin Video API] Works list: ${JSON.stringify(data).slice(0, 500)}`);
|
|
|
|
|
+ const posts = data?.data?.list || [];
|
|
|
|
|
+ for (const post of posts) {
|
|
|
|
|
+ capturedWorks.push({
|
|
|
|
|
+ workId: post.objectNonce || post.id || '',
|
|
|
|
|
+ title: post.title || post.desc || '',
|
|
|
|
|
+ coverUrl: post.cover?.url || post.cover || '',
|
|
|
|
|
+ });
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ // 监听评论列表 API
|
|
|
|
|
+ if (url.includes('/mmfinderassistant-bin/comment/comment_list')) {
|
|
|
|
|
+ const data = await response.json();
|
|
|
|
|
+ logger.info(`[Weixin Video API] Comments: ${JSON.stringify(data).slice(0, 500)}`);
|
|
|
|
|
+
|
|
|
|
|
+ const comments: CommentItem[] = [];
|
|
|
|
|
+ const commentList = data?.data?.commentList || data?.comments || [];
|
|
|
|
|
+
|
|
|
|
|
+ for (const comment of commentList) {
|
|
|
|
|
+ comments.push({
|
|
|
|
|
+ commentId: comment.commentId || comment.id || `weixin_${Date.now()}`,
|
|
|
|
|
+ authorId: comment.commenterInfo?.identifier || comment.authorId || '',
|
|
|
|
|
+ authorName: comment.commenterInfo?.nickName || comment.nickname || comment.nick_name || '',
|
|
|
|
|
+ authorAvatar: comment.commenterInfo?.headUrl || comment.avatar || '',
|
|
|
|
|
+ content: comment.content || '',
|
|
|
|
|
+ likeCount: comment.likeCnt || comment.like_count || 0,
|
|
|
|
|
+ commentTime: comment.createTime || comment.create_time || '',
|
|
|
|
|
+ parentCommentId: comment.parentCommentId || undefined,
|
|
|
|
|
+ });
|
|
|
|
|
+
|
|
|
|
|
+ // 处理子评论
|
|
|
|
|
+ const subComments = comment.subCommentList || comment.sub_comments || [];
|
|
|
|
|
+ for (const sub of subComments) {
|
|
|
|
|
+ comments.push({
|
|
|
|
|
+ commentId: sub.commentId || sub.id || `weixin_sub_${Date.now()}`,
|
|
|
|
|
+ authorId: sub.commenterInfo?.identifier || sub.authorId || '',
|
|
|
|
|
+ authorName: sub.commenterInfo?.nickName || sub.nickname || sub.nick_name || '',
|
|
|
|
|
+ authorAvatar: sub.commenterInfo?.headUrl || sub.avatar || '',
|
|
|
|
|
+ content: sub.content || '',
|
|
|
|
|
+ likeCount: sub.likeCnt || sub.like_count || 0,
|
|
|
|
|
+ commentTime: sub.createTime || sub.create_time || '',
|
|
|
|
|
+ parentCommentId: comment.commentId || comment.id || undefined,
|
|
|
|
|
+ });
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ // 尝试从 URL 获取作品 ID
|
|
|
|
|
+ const workIdMatch = url.match(/objectNonce=([^&]+)/) || url.match(/workId=([^&]+)/);
|
|
|
|
|
+ const workId = workIdMatch?.[1] || `work_${Date.now()}`;
|
|
|
|
|
+
|
|
|
|
|
+ if (comments.length > 0) {
|
|
|
|
|
+ const existing = capturedComments.get(workId) || [];
|
|
|
|
|
+ capturedComments.set(workId, [...existing, ...comments]);
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+ } catch { }
|
|
|
|
|
+ });
|
|
|
|
|
+
|
|
|
|
|
+ // 导航到评论管理页面
|
|
|
|
|
+ logger.info('[Weixin Video Comments] Navigating to comment management...');
|
|
|
|
|
+ await page.goto('https://channels.weixin.qq.com/platform/interaction/comment', {
|
|
|
|
|
+ waitUntil: 'domcontentloaded',
|
|
|
|
|
+ timeout: 60000,
|
|
|
|
|
+ });
|
|
|
|
|
+
|
|
|
|
|
+ await page.waitForTimeout(5000);
|
|
|
|
|
+
|
|
|
|
|
+ // 检查是否需要登录
|
|
|
|
|
+ const currentUrl = page.url();
|
|
|
|
|
+ if (currentUrl.includes('login') || currentUrl.includes('passport')) {
|
|
|
|
|
+ logger.warn('[Weixin Video Comments] Cookie expired, need re-login');
|
|
|
|
|
+ await browser.close();
|
|
|
|
|
+ return allWorkComments;
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ // 尝试加载更多评论
|
|
|
|
|
+ for (let i = 0; i < 5; i++) {
|
|
|
|
|
+ await page.evaluate(() => {
|
|
|
|
|
+ window.scrollBy(0, 500);
|
|
|
|
|
+ });
|
|
|
|
|
+ await page.waitForTimeout(1000);
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ // 等待 API 响应
|
|
|
|
|
+ await page.waitForTimeout(3000);
|
|
|
|
|
+
|
|
|
|
|
+ // 将捕获的评论转换为 WorkComments 格式
|
|
|
|
|
+ for (const [workId, comments] of capturedComments) {
|
|
|
|
|
+ const workInfo = capturedWorks.find(w => w.workId === workId);
|
|
|
|
|
+ allWorkComments.push({
|
|
|
|
|
+ videoId: workId,
|
|
|
|
|
+ videoTitle: workInfo?.title || `作品 ${workId.slice(0, 10)}`,
|
|
|
|
|
+ videoCoverUrl: workInfo?.coverUrl || '',
|
|
|
|
|
+ comments,
|
|
|
|
|
+ });
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ // 如果没有从 API 获取到评论,尝试从页面提取
|
|
|
|
|
+ if (allWorkComments.length === 0) {
|
|
|
|
|
+ logger.info('[Weixin Video Comments] No comments from API, extracting from page...');
|
|
|
|
|
+
|
|
|
|
|
+ const pageComments = await page.evaluate(() => {
|
|
|
|
|
+ const result: Array<{
|
|
|
|
|
+ commentId: string;
|
|
|
|
|
+ authorName: string;
|
|
|
|
|
+ authorAvatar: string;
|
|
|
|
|
+ content: string;
|
|
|
|
|
+ likeCount: number;
|
|
|
|
|
+ commentTime: string;
|
|
|
|
|
+ }> = [];
|
|
|
|
|
+
|
|
|
|
|
+ const commentItems = document.querySelectorAll('[class*="comment-item"], [class*="comment-card"]');
|
|
|
|
|
+ commentItems.forEach((item, index) => {
|
|
|
|
|
+ try {
|
|
|
|
|
+ const authorEl = item.querySelector('[class*="author"], [class*="name"]');
|
|
|
|
|
+ const avatarEl = item.querySelector('img');
|
|
|
|
|
+ const contentEl = item.querySelector('[class*="content"]');
|
|
|
|
|
+ const timeEl = item.querySelector('[class*="time"]');
|
|
|
|
|
+ const likeEl = item.querySelector('[class*="like"] span');
|
|
|
|
|
+
|
|
|
|
|
+ result.push({
|
|
|
|
|
+ commentId: `weixin_page_${index}`,
|
|
|
|
|
+ authorName: authorEl?.textContent?.trim() || '',
|
|
|
|
|
+ authorAvatar: avatarEl?.src || '',
|
|
|
|
|
+ content: contentEl?.textContent?.trim() || '',
|
|
|
|
|
+ likeCount: parseInt(likeEl?.textContent || '0') || 0,
|
|
|
|
|
+ commentTime: timeEl?.textContent?.trim() || '',
|
|
|
|
|
+ });
|
|
|
|
|
+ } catch { }
|
|
|
|
|
+ });
|
|
|
|
|
+
|
|
|
|
|
+ return result;
|
|
|
|
|
+ });
|
|
|
|
|
+
|
|
|
|
|
+ if (pageComments.length > 0) {
|
|
|
|
|
+ allWorkComments.push({
|
|
|
|
|
+ videoId: 'page_comments',
|
|
|
|
|
+ videoTitle: '页面评论',
|
|
|
|
|
+ videoCoverUrl: '',
|
|
|
|
|
+ comments: pageComments.map(c => ({
|
|
|
|
|
+ ...c,
|
|
|
|
|
+ authorId: '',
|
|
|
|
|
+ })),
|
|
|
|
|
+ });
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ await page.close();
|
|
|
|
|
+ await context.close();
|
|
|
|
|
+ await browser.close();
|
|
|
|
|
+
|
|
|
|
|
+ const totalComments = allWorkComments.reduce((sum, w) => sum + w.comments.length, 0);
|
|
|
|
|
+ logger.info(`[Weixin Video Comments] Total: fetched ${totalComments} comments from ${allWorkComments.length} works`);
|
|
|
|
|
+
|
|
|
|
|
+ return allWorkComments;
|
|
|
|
|
+
|
|
|
|
|
+ } catch (error) {
|
|
|
|
|
+ logger.error('[Weixin Video Comments] Error:', error);
|
|
|
|
|
+ try {
|
|
|
|
|
+ await browser.close();
|
|
|
|
|
+ } catch { }
|
|
|
|
|
+ return allWorkComments;
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
export const headlessBrowserService = new HeadlessBrowserService();
|
|
export const headlessBrowserService = new HeadlessBrowserService();
|