|
|
@@ -3,13 +3,25 @@ import { chromium, type BrowserContext, type Page } from 'playwright';
|
|
|
import { logger } from '../utils/logger.js';
|
|
|
import type { PlatformType } from '@media-manager/shared';
|
|
|
|
|
|
-// 平台 API 配置
|
|
|
+// 抖音 API 接口配置
|
|
|
+const DOUYIN_API = {
|
|
|
+ // 检查用户登录状态 - 返回 result: true 表示已登录(需要在浏览器上下文中调用)
|
|
|
+ CHECK_USER: '/aweme/v1/creator/check/user/',
|
|
|
+ // 获取作品列表(新接口,支持分页)
|
|
|
+ WORK_LIST: 'https://creator.douyin.com/janus/douyin/creator/pc/work_list',
|
|
|
+ // 获取评论列表
|
|
|
+ COMMENT_LIST: 'https://creator.douyin.com/web/api/third_party/aweme/api/comment/read/aweme/v1/web/comment/list/select/',
|
|
|
+ // 创作者首页(用于触发登录检查)
|
|
|
+ CREATOR_HOME: 'https://creator.douyin.com/creator-micro/home',
|
|
|
+};
|
|
|
+
|
|
|
+// 平台 API 配置(用于直接 HTTP 请求检查)
|
|
|
const PLATFORM_API_CONFIG: Record<string, {
|
|
|
checkUrl: string;
|
|
|
isValidResponse: (data: unknown) => boolean;
|
|
|
}> = {
|
|
|
douyin: {
|
|
|
- // 抖音检查 Cookie 有效性的 API
|
|
|
+ // 使用账号基础信息接口检查 Cookie 有效性
|
|
|
checkUrl: 'https://creator.douyin.com/web/api/creator/mcn/account_base_info?show_mcn_status=1',
|
|
|
isValidResponse: (data: unknown) => {
|
|
|
const resp = data as { status_code?: number; BaseResp?: { StatusCode?: number } };
|
|
|
@@ -41,6 +53,27 @@ export interface WorkItem {
|
|
|
shareCount: number;
|
|
|
}
|
|
|
|
|
|
+export interface CommentItem {
|
|
|
+ commentId: string;
|
|
|
+ authorId: string;
|
|
|
+ authorName: string;
|
|
|
+ authorAvatar: string;
|
|
|
+ content: string;
|
|
|
+ likeCount: number;
|
|
|
+ commentTime: string;
|
|
|
+ parentCommentId?: string;
|
|
|
+ videoId?: string;
|
|
|
+ videoTitle?: string;
|
|
|
+ videoCoverUrl?: string;
|
|
|
+}
|
|
|
+
|
|
|
+export interface WorkComments {
|
|
|
+ videoId: string;
|
|
|
+ videoTitle: string;
|
|
|
+ videoCoverUrl: string;
|
|
|
+ comments: CommentItem[];
|
|
|
+}
|
|
|
+
|
|
|
export interface CookieData {
|
|
|
name: string;
|
|
|
value: string;
|
|
|
@@ -132,6 +165,11 @@ class HeadlessBrowserService {
|
|
|
* 通过浏览器检查 Cookie 是否有效(检查是否被重定向到登录页)
|
|
|
*/
|
|
|
private async checkCookieValidByBrowser(platform: PlatformType, cookies: CookieData[]): Promise<boolean> {
|
|
|
+ // 对于抖音平台,使用 check/user 接口检查
|
|
|
+ if (platform === 'douyin') {
|
|
|
+ return this.checkDouyinLoginByApi(cookies);
|
|
|
+ }
|
|
|
+
|
|
|
const browser = await chromium.launch({ headless: true });
|
|
|
|
|
|
try {
|
|
|
@@ -174,6 +212,75 @@ class HeadlessBrowserService {
|
|
|
}
|
|
|
|
|
|
/**
|
|
|
+ * 抖音登录状态检查 - 通过监听 check/user 接口
|
|
|
+ * 访问创作者首页,监听 check/user 接口返回的 result 字段判断登录状态
|
|
|
+ */
|
|
|
+ async checkDouyinLoginByApi(cookies: CookieData[]): Promise<boolean> {
|
|
|
+ const browser = await chromium.launch({ headless: true });
|
|
|
+ let isLoggedIn = false;
|
|
|
+ let checkCompleted = false;
|
|
|
+
|
|
|
+ try {
|
|
|
+ const context = await browser.newContext({
|
|
|
+ viewport: { width: 1920, height: 1080 },
|
|
|
+ locale: 'zh-CN',
|
|
|
+ timezoneId: 'Asia/Shanghai',
|
|
|
+ userAgent: 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
|
|
|
+ });
|
|
|
+
|
|
|
+ await context.addCookies(cookies);
|
|
|
+ const page = await context.newPage();
|
|
|
+
|
|
|
+ // 监听 check/user 接口响应
|
|
|
+ page.on('response', async (response) => {
|
|
|
+ const url = response.url();
|
|
|
+ if (url.includes(DOUYIN_API.CHECK_USER)) {
|
|
|
+ try {
|
|
|
+ const data = await response.json();
|
|
|
+ // result: true 表示已登录
|
|
|
+ isLoggedIn = data?.result === true && data?.status_code === 0;
|
|
|
+ checkCompleted = true;
|
|
|
+ logger.info(`[Douyin] check/user API response: result=${data?.result}, status_code=${data?.status_code}, isLoggedIn=${isLoggedIn}`);
|
|
|
+ } catch {
|
|
|
+ // 忽略解析错误
|
|
|
+ }
|
|
|
+ }
|
|
|
+ });
|
|
|
+
|
|
|
+ // 访问创作者首页,触发 check/user 接口
|
|
|
+ await page.goto(DOUYIN_API.CREATOR_HOME, {
|
|
|
+ waitUntil: 'domcontentloaded',
|
|
|
+ timeout: 30000,
|
|
|
+ });
|
|
|
+
|
|
|
+ // 等待接口响应或超时
|
|
|
+ const startTime = Date.now();
|
|
|
+ while (!checkCompleted && Date.now() - startTime < 10000) {
|
|
|
+ await page.waitForTimeout(500);
|
|
|
+ }
|
|
|
+
|
|
|
+ // 如果没有收到 check/user 响应,检查 URL 是否被重定向到登录页
|
|
|
+ if (!checkCompleted) {
|
|
|
+ const currentUrl = page.url();
|
|
|
+ isLoggedIn = !currentUrl.includes('login') && !currentUrl.includes('passport');
|
|
|
+ logger.info(`[Douyin] No check/user response, fallback to URL check: ${currentUrl}, isLoggedIn=${isLoggedIn}`);
|
|
|
+ }
|
|
|
+
|
|
|
+ await page.close();
|
|
|
+ await context.close();
|
|
|
+ await browser.close();
|
|
|
+
|
|
|
+ return isLoggedIn;
|
|
|
+ } catch (error) {
|
|
|
+ logger.error('[Douyin] checkDouyinLoginByApi error:', error);
|
|
|
+ try {
|
|
|
+ await browser.close();
|
|
|
+ } catch { }
|
|
|
+ return false;
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ /**
|
|
|
* 获取账号信息(使用无头浏览器)
|
|
|
*/
|
|
|
async fetchAccountInfo(platform: PlatformType, cookies: CookieData[]): Promise<AccountInfo> {
|
|
|
@@ -219,7 +326,9 @@ class HeadlessBrowserService {
|
|
|
}
|
|
|
|
|
|
/**
|
|
|
- * 获取抖音账号信息
|
|
|
+ * 获取抖音账号信息 - 通过 API 方式获取
|
|
|
+ * 1. 监听 check/user 接口验证登录状态
|
|
|
+ * 2. 通过 work_list API 获取作品数和作品列表
|
|
|
*/
|
|
|
private async fetchDouyinAccountInfo(
|
|
|
page: Page,
|
|
|
@@ -232,6 +341,21 @@ class HeadlessBrowserService {
|
|
|
let fansCount = 0;
|
|
|
let worksCount = 0;
|
|
|
let worksList: WorkItem[] = [];
|
|
|
+ let isLoggedIn = false;
|
|
|
+
|
|
|
+ // 用于存储从 API 捕获的数据
|
|
|
+ const capturedData: {
|
|
|
+ userInfo?: { nickname?: string; avatar?: string; uid?: string; sec_uid?: string; follower_count?: number };
|
|
|
+ worksList?: Array<{
|
|
|
+ awemeId: string;
|
|
|
+ title: string;
|
|
|
+ coverUrl: string;
|
|
|
+ duration: number;
|
|
|
+ createTime: number;
|
|
|
+ statistics: { play_count: number; digg_count: number; comment_count: number; share_count: number };
|
|
|
+ }>;
|
|
|
+ total?: number;
|
|
|
+ } = {};
|
|
|
|
|
|
try {
|
|
|
// 从 Cookie 获取用户 ID
|
|
|
@@ -242,15 +366,90 @@ class HeadlessBrowserService {
|
|
|
accountId = `douyin_${uidCookie.value}`;
|
|
|
}
|
|
|
|
|
|
- // 访问主页获取基本信息
|
|
|
- await page.goto('https://creator.douyin.com/creator-micro/home', {
|
|
|
+ // 设置 API 响应监听器
|
|
|
+ page.on('response', async (response) => {
|
|
|
+ const url = response.url();
|
|
|
+ try {
|
|
|
+ // 监听 check/user 接口 - 验证登录状态
|
|
|
+ if (url.includes(DOUYIN_API.CHECK_USER)) {
|
|
|
+ const data = await response.json();
|
|
|
+ isLoggedIn = data?.result === true && data?.status_code === 0;
|
|
|
+ logger.info(`[Douyin API] check/user: isLoggedIn=${isLoggedIn}`);
|
|
|
+ }
|
|
|
+
|
|
|
+ // 监听 work_list 接口 - 获取作品列表
|
|
|
+ if (url.includes('/work_list') || url.includes('/janus/douyin/creator/pc/work_list')) {
|
|
|
+ const data = await response.json();
|
|
|
+ if (data?.aweme_list) {
|
|
|
+ // 获取总数
|
|
|
+ if (data.total !== undefined) {
|
|
|
+ capturedData.total = data.total;
|
|
|
+ }
|
|
|
+ // 解析作品列表
|
|
|
+ capturedData.worksList = data.aweme_list.map((aweme: Record<string, unknown>) => {
|
|
|
+ const statistics = aweme.statistics as Record<string, unknown> || {};
|
|
|
+ const cover = aweme.Cover as { url_list?: string[] } || aweme.video as { cover?: { url_list?: string[] } };
|
|
|
+ const coverUrl = cover?.url_list?.[0] || (cover as { cover?: { url_list?: string[] } })?.cover?.url_list?.[0] || '';
|
|
|
+
|
|
|
+ return {
|
|
|
+ awemeId: String(aweme.aweme_id || ''),
|
|
|
+ title: String(aweme.item_title || aweme.desc || '').split('\n')[0].slice(0, 50) || '无标题',
|
|
|
+ coverUrl,
|
|
|
+ duration: Number(aweme.duration || 0),
|
|
|
+ createTime: Number(aweme.create_time || 0),
|
|
|
+ statistics: {
|
|
|
+ play_count: Number(statistics.play_count || 0),
|
|
|
+ digg_count: Number(statistics.digg_count || 0),
|
|
|
+ comment_count: Number(statistics.comment_count || 0),
|
|
|
+ share_count: Number(statistics.share_count || 0),
|
|
|
+ },
|
|
|
+ };
|
|
|
+ });
|
|
|
+ logger.info(`[Douyin API] work_list: total=${capturedData.total}, items=${capturedData.worksList?.length}`);
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ // 监听账号信息接口
|
|
|
+ if (url.includes('/account_base_info') || url.includes('/user/info')) {
|
|
|
+ const data = await response.json();
|
|
|
+ if (data?.user || data?.data?.user) {
|
|
|
+ const user = data.user || data.data?.user || {};
|
|
|
+ capturedData.userInfo = {
|
|
|
+ nickname: user.nickname || user.name,
|
|
|
+ avatar: user.avatar_url || user.avatar_thumb?.url_list?.[0],
|
|
|
+ uid: user.uid || user.user_id,
|
|
|
+ sec_uid: user.sec_uid,
|
|
|
+ follower_count: user.follower_count || user.fans_count,
|
|
|
+ };
|
|
|
+ logger.info(`[Douyin API] user info: nickname=${capturedData.userInfo.nickname}`);
|
|
|
+ }
|
|
|
+ }
|
|
|
+ } catch {
|
|
|
+ // 忽略非 JSON 响应
|
|
|
+ }
|
|
|
+ });
|
|
|
+
|
|
|
+ // 访问主页获取基本信息并触发 check/user 接口
|
|
|
+ logger.info('[Douyin] Navigating to creator home...');
|
|
|
+ await page.goto(DOUYIN_API.CREATOR_HOME, {
|
|
|
waitUntil: 'domcontentloaded',
|
|
|
timeout: 30000,
|
|
|
});
|
|
|
|
|
|
await page.waitForTimeout(3000);
|
|
|
|
|
|
- // 使用 JavaScript 提取信息
|
|
|
+ // 检查登录状态 - 如果没有从 API 获取到,通过 URL 判断
|
|
|
+ if (!isLoggedIn) {
|
|
|
+ const currentUrl = page.url();
|
|
|
+ isLoggedIn = !currentUrl.includes('login') && !currentUrl.includes('passport');
|
|
|
+ }
|
|
|
+
|
|
|
+ if (!isLoggedIn) {
|
|
|
+ logger.warn('[Douyin] Not logged in, returning default account info');
|
|
|
+ return { accountId, accountName, avatarUrl, fansCount, worksCount, worksList };
|
|
|
+ }
|
|
|
+
|
|
|
+ // 从页面提取基本账号信息(作为 API 数据的补充)
|
|
|
const accountData = await page.evaluate(() => {
|
|
|
const result: { name?: string; avatar?: string; fans?: number; douyinId?: string } = {};
|
|
|
|
|
|
@@ -317,117 +516,52 @@ class HeadlessBrowserService {
|
|
|
return result;
|
|
|
});
|
|
|
|
|
|
- if (accountData.douyinId) {
|
|
|
+ // 优先使用 API 数据,否则使用页面数据
|
|
|
+ if (capturedData.userInfo?.uid) {
|
|
|
+ accountId = `douyin_${capturedData.userInfo.uid}`;
|
|
|
+ } else if (accountData.douyinId) {
|
|
|
accountId = `douyin_${accountData.douyinId}`;
|
|
|
}
|
|
|
- if (accountData.name) {
|
|
|
- accountName = accountData.name;
|
|
|
- }
|
|
|
- if (accountData.avatar) {
|
|
|
- avatarUrl = accountData.avatar;
|
|
|
- }
|
|
|
- if (accountData.fans !== undefined) {
|
|
|
- fansCount = accountData.fans;
|
|
|
- }
|
|
|
-
|
|
|
- // 访问内容管理页面获取作品数和作品列表
|
|
|
- try {
|
|
|
- await page.goto('https://creator.douyin.com/creator-micro/content/manage', {
|
|
|
- waitUntil: 'domcontentloaded',
|
|
|
- timeout: 30000,
|
|
|
- });
|
|
|
-
|
|
|
- await page.waitForTimeout(3000);
|
|
|
-
|
|
|
- // 获取作品总数
|
|
|
- const totalEl = await page.$('[class*="content-header-total"]');
|
|
|
- if (totalEl) {
|
|
|
- const totalText = await totalEl.textContent();
|
|
|
- if (totalText) {
|
|
|
- const match = totalText.match(/(\d+)/);
|
|
|
- if (match) {
|
|
|
- worksCount = parseInt(match[1], 10);
|
|
|
- }
|
|
|
- }
|
|
|
- }
|
|
|
-
|
|
|
- // 获取作品列表
|
|
|
- worksList = await page.evaluate(() => {
|
|
|
- const items: {
|
|
|
- videoId?: string;
|
|
|
- title: string;
|
|
|
- coverUrl: string;
|
|
|
- duration: string;
|
|
|
- publishTime: string;
|
|
|
- status: string;
|
|
|
- playCount: number;
|
|
|
- likeCount: number;
|
|
|
- commentCount: number;
|
|
|
- shareCount: number;
|
|
|
- }[] = [];
|
|
|
-
|
|
|
- const cards = document.querySelectorAll('[class*="video-card-zQ02ng"]');
|
|
|
-
|
|
|
- cards.forEach((card: Element) => {
|
|
|
- try {
|
|
|
- const coverEl = card.querySelector('[class*="video-card-cover"]') as HTMLElement | null;
|
|
|
- let coverUrl = '';
|
|
|
- if (coverEl && coverEl.style.backgroundImage) {
|
|
|
- const match = coverEl.style.backgroundImage.match(/url\("(.+?)"\)/);
|
|
|
- if (match) {
|
|
|
- coverUrl = match[1];
|
|
|
- }
|
|
|
- }
|
|
|
-
|
|
|
- const durationEl = card.querySelector('[class*="badge-"]');
|
|
|
- const duration = durationEl?.textContent?.trim() || '';
|
|
|
-
|
|
|
- const titleEl = card.querySelector('[class*="info-title-text"]');
|
|
|
- const title = titleEl?.textContent?.trim() || '无作品描述';
|
|
|
-
|
|
|
- const timeEl = card.querySelector('[class*="info-time"]');
|
|
|
- const publishTime = timeEl?.textContent?.trim() || '';
|
|
|
-
|
|
|
- const statusEl = card.querySelector('[class*="info-status"]');
|
|
|
- const status = statusEl?.textContent?.trim() || '';
|
|
|
-
|
|
|
- const metricItems = card.querySelectorAll('[class*="metric-item-u1CAYE"]');
|
|
|
- let playCount = 0, likeCount = 0, commentCount = 0, shareCount = 0;
|
|
|
|
|
|
- metricItems.forEach((metric: Element) => {
|
|
|
- const labelEl = metric.querySelector('[class*="metric-label"]');
|
|
|
- const valueEl = metric.querySelector('[class*="metric-value"]');
|
|
|
- const label = labelEl?.textContent?.trim() || '';
|
|
|
- const value = parseInt(valueEl?.textContent?.trim() || '0', 10);
|
|
|
-
|
|
|
- switch (label) {
|
|
|
- case '播放': playCount = value; break;
|
|
|
- case '点赞': likeCount = value; break;
|
|
|
- case '评论': commentCount = value; break;
|
|
|
- case '分享': shareCount = value; break;
|
|
|
- }
|
|
|
- });
|
|
|
-
|
|
|
- items.push({
|
|
|
- title,
|
|
|
- coverUrl,
|
|
|
- duration,
|
|
|
- publishTime,
|
|
|
- status,
|
|
|
- playCount,
|
|
|
- likeCount,
|
|
|
- commentCount,
|
|
|
- shareCount,
|
|
|
- });
|
|
|
- } catch { }
|
|
|
- });
|
|
|
-
|
|
|
- return items;
|
|
|
- });
|
|
|
-
|
|
|
- logger.info(`Douyin works: total ${worksCount}, fetched ${worksList.length} items`);
|
|
|
- } catch (worksError) {
|
|
|
- logger.warn('Failed to fetch Douyin works list:', worksError);
|
|
|
+ accountName = capturedData.userInfo?.nickname || accountData.name || accountName;
|
|
|
+ avatarUrl = capturedData.userInfo?.avatar || accountData.avatar || avatarUrl;
|
|
|
+ fansCount = capturedData.userInfo?.follower_count || accountData.fans || fansCount;
|
|
|
+
|
|
|
+ // 通过 API 获取作品列表
|
|
|
+ logger.info('[Douyin] Fetching works via API...');
|
|
|
+ const apiWorks = await this.fetchWorksDirectApi(page);
|
|
|
+
|
|
|
+ if (apiWorks.length > 0) {
|
|
|
+ worksCount = apiWorks.length;
|
|
|
+ worksList = apiWorks.map(w => ({
|
|
|
+ videoId: w.awemeId,
|
|
|
+ title: w.title,
|
|
|
+ coverUrl: w.coverUrl,
|
|
|
+ duration: '00:00',
|
|
|
+ publishTime: w.createTime ? new Date(w.createTime * 1000).toISOString() : '',
|
|
|
+ status: 'published',
|
|
|
+ playCount: 0,
|
|
|
+ likeCount: 0,
|
|
|
+ commentCount: w.commentCount,
|
|
|
+ shareCount: 0,
|
|
|
+ }));
|
|
|
+ logger.info(`[Douyin] Got ${worksCount} works from API`);
|
|
|
+ } else if (capturedData.worksList && capturedData.worksList.length > 0) {
|
|
|
+ // 如果直接 API 调用失败,使用监听到的数据
|
|
|
+ worksCount = capturedData.total || capturedData.worksList.length;
|
|
|
+ worksList = capturedData.worksList.map(w => ({
|
|
|
+ videoId: w.awemeId,
|
|
|
+ title: w.title,
|
|
|
+ coverUrl: w.coverUrl,
|
|
|
+ duration: this.formatDuration(w.duration),
|
|
|
+ publishTime: w.createTime ? new Date(w.createTime * 1000).toISOString() : '',
|
|
|
+ status: 'published',
|
|
|
+ playCount: w.statistics.play_count,
|
|
|
+ likeCount: w.statistics.digg_count,
|
|
|
+ commentCount: w.statistics.comment_count,
|
|
|
+ shareCount: w.statistics.share_count,
|
|
|
+ }));
|
|
|
+ logger.info(`[Douyin] Got ${worksCount} works from intercepted API data`);
|
|
|
}
|
|
|
|
|
|
} catch (error) {
|
|
|
@@ -438,6 +572,17 @@ class HeadlessBrowserService {
|
|
|
}
|
|
|
|
|
|
/**
|
|
|
+ * 格式化视频时长
|
|
|
+ */
|
|
|
+ private formatDuration(ms: number): string {
|
|
|
+ if (!ms) return '00:00';
|
|
|
+ const seconds = Math.floor(ms / 1000);
|
|
|
+ const minutes = Math.floor(seconds / 60);
|
|
|
+ const remainingSeconds = seconds % 60;
|
|
|
+ return `${minutes.toString().padStart(2, '0')}:${remainingSeconds.toString().padStart(2, '0')}`;
|
|
|
+ }
|
|
|
+
|
|
|
+ /**
|
|
|
* 获取B站账号信息
|
|
|
*/
|
|
|
private async fetchBilibiliAccountInfo(
|
|
|
@@ -593,6 +738,1467 @@ class HeadlessBrowserService {
|
|
|
worksCount: 0,
|
|
|
};
|
|
|
}
|
|
|
+
|
|
|
+ /**
|
|
|
+ * 获取抖音评论 - 逐个选择作品获取评论
|
|
|
+ */
|
|
|
+ async fetchDouyinComments(cookies: CookieData[]): Promise<WorkComments[]> {
|
|
|
+ const browser = await chromium.launch({
|
|
|
+ headless: true,
|
|
|
+ args: ['--no-sandbox', '--disable-setuid-sandbox'],
|
|
|
+ });
|
|
|
+
|
|
|
+ const allWorkComments: WorkComments[] = [];
|
|
|
+
|
|
|
+ try {
|
|
|
+ const context = await browser.newContext({
|
|
|
+ viewport: { width: 1920, height: 1080 },
|
|
|
+ userAgent: 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
|
|
|
+ });
|
|
|
+
|
|
|
+ // 设置 Cookie
|
|
|
+ const playwrightCookies = cookies.map(c => ({
|
|
|
+ name: c.name,
|
|
|
+ value: c.value,
|
|
|
+ domain: c.domain || '.douyin.com',
|
|
|
+ path: c.path || '/',
|
|
|
+ }));
|
|
|
+ await context.addCookies(playwrightCookies);
|
|
|
+
|
|
|
+ const page = await context.newPage();
|
|
|
+
|
|
|
+ // 导航到评论管理页面
|
|
|
+ logger.info('Navigating to Douyin comment management page...');
|
|
|
+ await page.goto('https://creator.douyin.com/creator-micro/interactive/comment', {
|
|
|
+ waitUntil: 'domcontentloaded',
|
|
|
+ timeout: 30000,
|
|
|
+ });
|
|
|
+
|
|
|
+ await page.waitForTimeout(3000);
|
|
|
+
|
|
|
+ // 点击"选择作品"按钮
|
|
|
+ logger.info('Looking for "选择作品" button...');
|
|
|
+ const selectWorkBtn = await page.$('button:has-text("选择作品"), [class*="select"]:has-text("选择作品"), div:has-text("选择作品")');
|
|
|
+
|
|
|
+ if (selectWorkBtn) {
|
|
|
+ await selectWorkBtn.click();
|
|
|
+ await page.waitForTimeout(2000);
|
|
|
+ logger.info('Clicked "选择作品" button');
|
|
|
+ } else {
|
|
|
+ // 如果没有选择作品按钮,可能已经有作品被选中,直接获取当前评论
|
|
|
+ logger.info('No "选择作品" button found, fetching current comments...');
|
|
|
+ const currentComments = await this.extractCommentsFromPage(page);
|
|
|
+ if (currentComments.length > 0) {
|
|
|
+ allWorkComments.push({
|
|
|
+ videoId: 'current',
|
|
|
+ videoTitle: '当前作品',
|
|
|
+ videoCoverUrl: '',
|
|
|
+ comments: currentComments,
|
|
|
+ });
|
|
|
+ }
|
|
|
+ await page.close();
|
|
|
+ await context.close();
|
|
|
+ await browser.close();
|
|
|
+ return allWorkComments;
|
|
|
+ }
|
|
|
+
|
|
|
+ // 获取作品列表
|
|
|
+ const worksList = await page.evaluate(() => {
|
|
|
+ const works: Array<{ videoId: string; title: string; coverUrl: string }> = [];
|
|
|
+
|
|
|
+ // 查找作品列表容器(弹窗中的作品选择列表)
|
|
|
+ const workItems = document.querySelectorAll('[class*="video-card"], [class*="work-item"], [class*="content-item"]');
|
|
|
+
|
|
|
+ workItems.forEach((item, index) => {
|
|
|
+ const titleEl = item.querySelector('[class*="title"], [class*="desc"]');
|
|
|
+ const coverEl = item.querySelector('img, [class*="cover"]');
|
|
|
+
|
|
|
+ const title = titleEl?.textContent?.trim() || `作品 ${index + 1}`;
|
|
|
+ let coverUrl = '';
|
|
|
+
|
|
|
+ if (coverEl) {
|
|
|
+ coverUrl = (coverEl as HTMLImageElement).src ||
|
|
|
+ coverEl.getAttribute('style')?.match(/url\(['"]?([^'")\s]+)['"]?\)/)?.[1] || '';
|
|
|
+ }
|
|
|
+
|
|
|
+ // 获取 video ID(从数据属性或其他方式)
|
|
|
+ const videoId = item.getAttribute('data-video-id') ||
|
|
|
+ item.getAttribute('data-id') ||
|
|
|
+ `video_${index}`;
|
|
|
+
|
|
|
+ works.push({ videoId, title, coverUrl });
|
|
|
+ });
|
|
|
+
|
|
|
+ return works;
|
|
|
+ });
|
|
|
+
|
|
|
+ logger.info(`Found ${worksList.length} works in the selector`);
|
|
|
+
|
|
|
+ // 如果有作品列表,逐个选择并获取评论
|
|
|
+ if (worksList.length > 0) {
|
|
|
+ for (let i = 0; i < worksList.length; i++) {
|
|
|
+ try {
|
|
|
+ logger.info(`Processing work ${i + 1}/${worksList.length}: ${worksList[i].title}`);
|
|
|
+
|
|
|
+ // 点击选择该作品
|
|
|
+ const workItem = await page.$(`[class*="video-card"]:nth-child(${i + 1}), [class*="work-item"]:nth-child(${i + 1})`);
|
|
|
+ if (workItem) {
|
|
|
+ await workItem.click();
|
|
|
+ await page.waitForTimeout(2000);
|
|
|
+
|
|
|
+ // 点击确认按钮(如果有)
|
|
|
+ const confirmBtn = await page.$('button:has-text("确定"), button:has-text("确认")');
|
|
|
+ if (confirmBtn) {
|
|
|
+ await confirmBtn.click();
|
|
|
+ await page.waitForTimeout(2000);
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ // 等待评论加载
|
|
|
+ await page.waitForTimeout(2000);
|
|
|
+
|
|
|
+ // 提取评论
|
|
|
+ const comments = await this.extractCommentsFromPage(page);
|
|
|
+
|
|
|
+ if (comments.length > 0) {
|
|
|
+ allWorkComments.push({
|
|
|
+ videoId: worksList[i].videoId,
|
|
|
+ videoTitle: worksList[i].title,
|
|
|
+ videoCoverUrl: worksList[i].coverUrl,
|
|
|
+ comments,
|
|
|
+ });
|
|
|
+ logger.info(`Extracted ${comments.length} comments for work: ${worksList[i].title}`);
|
|
|
+ }
|
|
|
+
|
|
|
+ // 重新打开选择作品弹窗(如果需要继续选择其他作品)
|
|
|
+ if (i < worksList.length - 1) {
|
|
|
+ const selectBtn = await page.$('button:has-text("选择作品"), [class*="select"]:has-text("选择作品")');
|
|
|
+ if (selectBtn) {
|
|
|
+ await selectBtn.click();
|
|
|
+ await page.waitForTimeout(2000);
|
|
|
+ }
|
|
|
+ }
|
|
|
+ } catch (workError) {
|
|
|
+ logger.warn(`Failed to process work ${i + 1}:`, workError);
|
|
|
+ }
|
|
|
+ }
|
|
|
+ } else {
|
|
|
+ // 没有找到作品列表,尝试直接从页面获取评论
|
|
|
+ const comments = await this.extractCommentsFromPage(page);
|
|
|
+ if (comments.length > 0) {
|
|
|
+ // 获取当前显示的作品信息
|
|
|
+ const currentWork = await page.evaluate(() => {
|
|
|
+ const titleEl = document.querySelector('[class*="video-title"], [class*="content-title"]');
|
|
|
+ const coverEl = document.querySelector('[class*="video-cover"] img, [class*="cover"] img');
|
|
|
+
|
|
|
+ return {
|
|
|
+ title: titleEl?.textContent?.trim() || '当前作品',
|
|
|
+ coverUrl: (coverEl as HTMLImageElement)?.src || '',
|
|
|
+ };
|
|
|
+ });
|
|
|
+
|
|
|
+ allWorkComments.push({
|
|
|
+ videoId: 'current',
|
|
|
+ videoTitle: currentWork.title,
|
|
|
+ videoCoverUrl: currentWork.coverUrl,
|
|
|
+ comments,
|
|
|
+ });
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ await page.close();
|
|
|
+ await context.close();
|
|
|
+ await browser.close();
|
|
|
+
|
|
|
+ logger.info(`Total: fetched comments from ${allWorkComments.length} works`);
|
|
|
+ return allWorkComments;
|
|
|
+
|
|
|
+ } catch (error) {
|
|
|
+ logger.error('Error fetching Douyin comments:', error);
|
|
|
+ await browser.close();
|
|
|
+ return allWorkComments;
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ /**
|
|
|
+ * 从页面提取评论列表
|
|
|
+ * 使用抖音创作者中心的精确选择器
|
|
|
+ * 根据实际 HTML 结构:
|
|
|
+ * - 评论容器: container-sXKyMs (或类似 container-xxx)
|
|
|
+ * - 用户名: username-aLgaNB (或类似 username-xxx)
|
|
|
+ * - 时间: time-NRtTXO (或类似 time-xxx)
|
|
|
+ * - 评论内容: comment-content-text-JvmAKq (或类似 comment-content-text-xxx)
|
|
|
+ * - 头像: avatar-BRKDsF (或类似 avatar-xxx)
|
|
|
+ */
|
|
|
+ private async extractCommentsFromPage(page: Page): Promise<CommentItem[]> {
|
|
|
+ return page.evaluate(() => {
|
|
|
+ const comments: Array<{
|
|
|
+ commentId: string;
|
|
|
+ authorId: string;
|
|
|
+ authorName: string;
|
|
|
+ authorAvatar: string;
|
|
|
+ content: string;
|
|
|
+ likeCount: number;
|
|
|
+ commentTime: string;
|
|
|
+ }> = [];
|
|
|
+
|
|
|
+ const seenContents = new Set<string>();
|
|
|
+
|
|
|
+ // 方法1: 直接查找所有评论容器 (container-xxx 类名)
|
|
|
+ // 评论容器通常包含 checkbox、avatar、content 等子元素
|
|
|
+ const allContainers = document.querySelectorAll('[class*="container-"]');
|
|
|
+ const commentContainers: Element[] = [];
|
|
|
+
|
|
|
+ allContainers.forEach(container => {
|
|
|
+ // 检查是否是评论容器:包含用户名和评论内容
|
|
|
+ const hasUsername = container.querySelector('[class*="username-"]');
|
|
|
+ const hasCommentContent = container.querySelector('[class*="comment-content-text-"]');
|
|
|
+ if (hasUsername && hasCommentContent) {
|
|
|
+ commentContainers.push(container);
|
|
|
+ }
|
|
|
+ });
|
|
|
+
|
|
|
+ console.log(`Found ${commentContainers.length} comment containers`);
|
|
|
+
|
|
|
+ // 如果方法1没找到,尝试方法2:通过评论内容元素向上查找
|
|
|
+ if (commentContainers.length === 0) {
|
|
|
+ const contentElements = document.querySelectorAll('[class*="comment-content-text-"]');
|
|
|
+ console.log(`Found ${contentElements.length} content elements, searching parents...`);
|
|
|
+
|
|
|
+ contentElements.forEach(contentEl => {
|
|
|
+ let parent = contentEl.parentElement;
|
|
|
+ // 向上查找最多 10 层
|
|
|
+ for (let i = 0; i < 10 && parent; i++) {
|
|
|
+ const className = parent.className || '';
|
|
|
+ // 查找包含 container- 的父元素
|
|
|
+ if (className.includes('container-')) {
|
|
|
+ if (!commentContainers.includes(parent)) {
|
|
|
+ commentContainers.push(parent);
|
|
|
+ }
|
|
|
+ break;
|
|
|
+ }
|
|
|
+ parent = parent.parentElement;
|
|
|
+ }
|
|
|
+ });
|
|
|
+ }
|
|
|
+
|
|
|
+ console.log(`Total comment containers: ${commentContainers.length}`);
|
|
|
+
|
|
|
+ commentContainers.forEach((container, index) => {
|
|
|
+ try {
|
|
|
+ // 提取用户名 - 使用 username-xxx 选择器
|
|
|
+ let authorName = '';
|
|
|
+ const usernameEl = container.querySelector('[class*="username-"]');
|
|
|
+ if (usernameEl && usernameEl.textContent) {
|
|
|
+ authorName = usernameEl.textContent.trim();
|
|
|
+ }
|
|
|
+ if (!authorName) authorName = '未知用户';
|
|
|
+
|
|
|
+ // 提取头像 - 从 avatar-xxx 容器内的 img 提取
|
|
|
+ let authorAvatar = '';
|
|
|
+ const avatarContainer = container.querySelector('[class*="avatar-"]');
|
|
|
+ if (avatarContainer) {
|
|
|
+ const avatarImg = avatarContainer.querySelector('img');
|
|
|
+ if (avatarImg) {
|
|
|
+ authorAvatar = avatarImg.src || '';
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ // 提取时间 - 使用 time-xxx 选择器
|
|
|
+ let commentTime = '';
|
|
|
+ const timeEl = container.querySelector('[class*="time-"]');
|
|
|
+ if (timeEl && timeEl.textContent) {
|
|
|
+ commentTime = timeEl.textContent.trim();
|
|
|
+ }
|
|
|
+
|
|
|
+ // 提取评论内容 - 使用 comment-content-text-xxx 选择器
|
|
|
+ let content = '';
|
|
|
+ const contentEl = container.querySelector('[class*="comment-content-text-"]');
|
|
|
+ if (contentEl && contentEl.textContent) {
|
|
|
+ content = contentEl.textContent.trim();
|
|
|
+ }
|
|
|
+
|
|
|
+ // 跳过空内容
|
|
|
+ if (!content || content.length < 1) {
|
|
|
+ console.log(`[${index}] Skipping empty content`);
|
|
|
+ return;
|
|
|
+ }
|
|
|
+
|
|
|
+ // 去重 (基于用户名+内容)
|
|
|
+ const contentKey = `${authorName}||${content}`;
|
|
|
+ if (seenContents.has(contentKey)) {
|
|
|
+ console.log(`[${index}] Skipping duplicate: ${authorName} - ${content.slice(0, 20)}`);
|
|
|
+ return;
|
|
|
+ }
|
|
|
+ seenContents.add(contentKey);
|
|
|
+
|
|
|
+ // 提取点赞数 - 从 operations-xxx 或 item-xxx 中提取
|
|
|
+ let likeCount = 0;
|
|
|
+ const operationsEl = container.querySelector('[class*="operations-"]');
|
|
|
+ if (operationsEl) {
|
|
|
+ // 查找第一个 item-xxx,通常是点赞数
|
|
|
+ const firstItem = operationsEl.querySelector('[class*="item-"]');
|
|
|
+ if (firstItem) {
|
|
|
+ const text = firstItem.textContent || '';
|
|
|
+ const match = text.match(/(\d+)/);
|
|
|
+ if (match) {
|
|
|
+ likeCount = parseInt(match[1], 10);
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ // 生成唯一 ID
|
|
|
+ const contentHash = content.slice(0, 30) + authorName + commentTime;
|
|
|
+ const commentId = `dy_${btoa(encodeURIComponent(contentHash)).slice(0, 20)}`;
|
|
|
+
|
|
|
+ comments.push({
|
|
|
+ commentId,
|
|
|
+ authorId: authorName,
|
|
|
+ authorName,
|
|
|
+ authorAvatar,
|
|
|
+ content,
|
|
|
+ likeCount,
|
|
|
+ commentTime,
|
|
|
+ });
|
|
|
+
|
|
|
+ console.log(`[${index}] Extracted: ${authorName} - ${content.slice(0, 30)} (${commentTime})`);
|
|
|
+ } catch (err) {
|
|
|
+ console.error(`[${index}] Error extracting comment:`, err);
|
|
|
+ }
|
|
|
+ });
|
|
|
+
|
|
|
+ console.log(`Successfully extracted ${comments.length} comments`);
|
|
|
+ return comments;
|
|
|
+ });
|
|
|
+ }
|
|
|
+
|
|
|
+ /**
|
|
|
+ * 获取抖音评论 - 通过监听 API 请求 (推荐方式)
|
|
|
+ * 使用无头浏览器,通过拦截网络请求直接获取 API 数据
|
|
|
+ * 更稳定、更高效
|
|
|
+ */
|
|
|
+ async fetchDouyinCommentsByApiInterception(cookies: CookieData[]): Promise<WorkComments[]> {
|
|
|
+ const browser = await chromium.launch({
|
|
|
+ headless: true, // 无头模式
|
|
|
+ args: ['--no-sandbox', '--disable-setuid-sandbox'],
|
|
|
+ });
|
|
|
+
|
|
|
+ const allWorkComments: WorkComments[] = [];
|
|
|
+ // 存储捕获的 API 数据
|
|
|
+ const capturedWorks: Array<{
|
|
|
+ awemeId: string;
|
|
|
+ title: string;
|
|
|
+ coverUrl: string;
|
|
|
+ commentCount: number;
|
|
|
+ }> = [];
|
|
|
+ const capturedComments: Map<string, CommentItem[]> = new Map();
|
|
|
+
|
|
|
+ try {
|
|
|
+ const context = await browser.newContext({
|
|
|
+ viewport: { width: 1920, height: 1080 },
|
|
|
+ userAgent: 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
|
|
|
+ });
|
|
|
+
|
|
|
+ // 设置 Cookie
|
|
|
+ const playwrightCookies = cookies.map(c => ({
|
|
|
+ name: c.name,
|
|
|
+ value: c.value,
|
|
|
+ domain: c.domain || '.douyin.com',
|
|
|
+ path: c.path || '/',
|
|
|
+ }));
|
|
|
+ await context.addCookies(playwrightCookies);
|
|
|
+ logger.info(`[API Interception] Set ${playwrightCookies.length} cookies`);
|
|
|
+
|
|
|
+ const page = await context.newPage();
|
|
|
+
|
|
|
+ // 监听网络响应
|
|
|
+ page.on('response', async (response) => {
|
|
|
+ const url = response.url();
|
|
|
+
|
|
|
+ try {
|
|
|
+ // 监听作品列表 API - 支持新旧两种接口
|
|
|
+ // 新接口: /janus/douyin/creator/pc/work_list (aweme_list 字段)
|
|
|
+ // 旧接口: /creator/item/list (item_info_list 字段)
|
|
|
+ if (url.includes('/work_list') || url.includes('/creator/item/list')) {
|
|
|
+ const data = await response.json();
|
|
|
+
|
|
|
+ // 新接口: aweme_list
|
|
|
+ if (data?.aweme_list && data.aweme_list.length > 0) {
|
|
|
+ for (const aweme of data.aweme_list) {
|
|
|
+ const awemeId = String(aweme.aweme_id || '');
|
|
|
+ if (!awemeId) continue;
|
|
|
+
|
|
|
+ const statistics = aweme.statistics || {};
|
|
|
+ const commentCount = parseInt(String(statistics.comment_count || '0'), 10);
|
|
|
+
|
|
|
+ let title = aweme.item_title || '';
|
|
|
+ if (!title) {
|
|
|
+ const desc = aweme.desc || aweme.caption || '';
|
|
|
+ title = desc.split('\n')[0].slice(0, 50) || '无标题';
|
|
|
+ }
|
|
|
+
|
|
|
+ let coverUrl = '';
|
|
|
+ if (aweme.Cover?.url_list?.length > 0) {
|
|
|
+ coverUrl = aweme.Cover.url_list[0];
|
|
|
+ } else if (aweme.video?.cover?.url_list?.length > 0) {
|
|
|
+ coverUrl = aweme.video.cover.url_list[0];
|
|
|
+ }
|
|
|
+
|
|
|
+ capturedWorks.push({
|
|
|
+ awemeId,
|
|
|
+ title,
|
|
|
+ coverUrl,
|
|
|
+ commentCount,
|
|
|
+ });
|
|
|
+ }
|
|
|
+ logger.info(`[API] Captured ${data.aweme_list.length} works from work_list API`);
|
|
|
+ }
|
|
|
+
|
|
|
+ // 旧接口: item_info_list
|
|
|
+ const itemList = data?.item_info_list || data?.item_list || [];
|
|
|
+ if (itemList.length > 0) {
|
|
|
+ for (const item of itemList) {
|
|
|
+ capturedWorks.push({
|
|
|
+ awemeId: item.item_id_plain || item.aweme_id || '',
|
|
|
+ title: item.title || '无标题',
|
|
|
+ coverUrl: item.cover_image_url || '',
|
|
|
+ commentCount: item.comment_count || 0,
|
|
|
+ });
|
|
|
+ }
|
|
|
+ logger.info(`[API] Captured ${itemList.length} works from item/list API`);
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ // 监听评论列表 API(两种格式)
|
|
|
+ // 格式1: /comment/list/select/ - 初始加载,返回 { comments: [...] }
|
|
|
+ // 格式2: /creator/comment/list/ - 切换作品后,返回 { comment_info_list: [...] }
|
|
|
+ if (url.includes('/comment/list') || url.includes('/comment/read')) {
|
|
|
+ const data = await response.json();
|
|
|
+ // 从 URL 中提取 aweme_id
|
|
|
+ const awemeIdMatch = url.match(/aweme_id=(\d+)/);
|
|
|
+ const awemeId = awemeIdMatch?.[1] || '';
|
|
|
+
|
|
|
+ let comments: CommentItem[] = [];
|
|
|
+
|
|
|
+ // 格式1: 初始加载的评论 API (comment/list/select)
|
|
|
+ if (data?.comments && Array.isArray(data.comments) && data.comments.length > 0) {
|
|
|
+ comments = data.comments.map((c: Record<string, unknown>) => {
|
|
|
+ const user = c.user as Record<string, unknown> | undefined;
|
|
|
+ const avatarThumb = user?.avatar_thumb as Record<string, unknown> | undefined;
|
|
|
+ const avatarUrls = avatarThumb?.url_list as string[] | undefined;
|
|
|
+
|
|
|
+ return {
|
|
|
+ commentId: String(c.cid || ''),
|
|
|
+ authorId: String(user?.uid || ''),
|
|
|
+ authorName: String(user?.nickname || '匿名'),
|
|
|
+ authorAvatar: avatarUrls?.[0] || '',
|
|
|
+ content: String(c.text || ''),
|
|
|
+ likeCount: Number(c.digg_count || 0),
|
|
|
+ commentTime: new Date(Number(c.create_time || 0) * 1000).toISOString(),
|
|
|
+ videoId: String(c.aweme_id || awemeId),
|
|
|
+ };
|
|
|
+ });
|
|
|
+ logger.info(`[API] Format1 (comments): Captured ${comments.length} comments`);
|
|
|
+ }
|
|
|
+
|
|
|
+ // 格式2: 切换作品后的评论 API (creator/comment/list)
|
|
|
+ if (data?.comment_info_list && Array.isArray(data.comment_info_list) && data.comment_info_list.length > 0) {
|
|
|
+ comments = data.comment_info_list.map((c: Record<string, unknown>) => {
|
|
|
+ const userInfo = c.user_info as Record<string, unknown> | undefined;
|
|
|
+
|
|
|
+ return {
|
|
|
+ commentId: String(c.comment_id || ''),
|
|
|
+ authorId: String(userInfo?.user_id || ''),
|
|
|
+ authorName: String(userInfo?.screen_name || '匿名'),
|
|
|
+ authorAvatar: String(userInfo?.avatar_url || ''),
|
|
|
+ content: String(c.text || ''),
|
|
|
+ likeCount: Number(c.digg_count || 0),
|
|
|
+ commentTime: new Date(Number(c.create_time || 0) * 1000).toISOString(),
|
|
|
+ videoId: awemeId, // 这种格式没有直接返回 aweme_id
|
|
|
+ };
|
|
|
+ });
|
|
|
+ logger.info(`[API] Format2 (comment_info_list): Captured ${comments.length} comments`);
|
|
|
+ }
|
|
|
+
|
|
|
+ if (comments.length > 0) {
|
|
|
+ const videoId = comments[0]?.videoId || awemeId;
|
|
|
+ if (videoId) {
|
|
|
+ const existing = capturedComments.get(videoId) || [];
|
|
|
+ capturedComments.set(videoId, [...existing, ...comments]);
|
|
|
+ logger.info(`[API] Total captured ${comments.length} comments for aweme ${videoId}`);
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ } catch {
|
|
|
+ // 忽略非 JSON 响应
|
|
|
+ }
|
|
|
+ });
|
|
|
+
|
|
|
+ // 导航到创作者中心页面(设置好 Cookie 后)
|
|
|
+ logger.info('[API Interception] Navigating to creator page...');
|
|
|
+ await page.goto('https://creator.douyin.com/creator-micro/home', {
|
|
|
+ waitUntil: 'domcontentloaded',
|
|
|
+ timeout: 60000,
|
|
|
+ });
|
|
|
+
|
|
|
+ // 等待页面加载
|
|
|
+ await page.waitForTimeout(3000);
|
|
|
+
|
|
|
+ // 检查是否需要登录
|
|
|
+ const currentUrl = page.url();
|
|
|
+ if (currentUrl.includes('login') || currentUrl.includes('passport')) {
|
|
|
+ logger.warn('[API Interception] Cookie expired');
|
|
|
+ return [];
|
|
|
+ }
|
|
|
+
|
|
|
+ // 方式1:直接调用 API 获取作品列表(优先)
|
|
|
+ logger.info('[API Interception] Fetching works via direct API...');
|
|
|
+ let works = await this.fetchWorksDirectApi(page);
|
|
|
+
|
|
|
+ // 方式2:如果直接调用失败,尝试通过页面触发 API
|
|
|
+ if (works.length === 0) {
|
|
|
+ logger.info('[API Interception] Direct API failed, trying page navigation...');
|
|
|
+ await page.goto('https://creator.douyin.com/creator-micro/interactive/comment', {
|
|
|
+ waitUntil: 'networkidle',
|
|
|
+ timeout: 60000,
|
|
|
+ });
|
|
|
+ await page.waitForTimeout(3000);
|
|
|
+
|
|
|
+ // 点击"选择作品"按钮触发作品列表 API
|
|
|
+ const selectBtn = await page.$('button:has-text("选择作品"), [class*="select"]:has-text("选择")');
|
|
|
+ if (selectBtn) {
|
|
|
+ await selectBtn.click();
|
|
|
+ await page.waitForTimeout(3000);
|
|
|
+ }
|
|
|
+
|
|
|
+ // 使用监听到的数据
|
|
|
+ works = capturedWorks;
|
|
|
+ }
|
|
|
+
|
|
|
+ logger.info(`[API Interception] Total works: ${works.length}`);
|
|
|
+
|
|
|
+ // 获取有评论的作品
|
|
|
+ const worksWithComments = works.filter(w => w.commentCount > 0);
|
|
|
+ logger.info(`[API Interception] Works with comments: ${worksWithComments.length}`);
|
|
|
+
|
|
|
+ // 如果有评论的作品,需要导航到评论管理页面并逐个切换获取
|
|
|
+ if (worksWithComments.length > 0) {
|
|
|
+ // 先尝试直接调用 API 获取评论
|
|
|
+ for (const work of worksWithComments) {
|
|
|
+ logger.info(`[API Interception] Trying direct API for: ${work.title.slice(0, 30)}... (${work.commentCount} comments)`);
|
|
|
+
|
|
|
+ let comments = capturedComments.get(work.awemeId) || [];
|
|
|
+
|
|
|
+ if (comments.length === 0) {
|
|
|
+ comments = await this.fetchCommentsDirectApi(page, work.awemeId);
|
|
|
+ }
|
|
|
+
|
|
|
+ if (comments.length > 0) {
|
|
|
+ allWorkComments.push({
|
|
|
+ videoId: work.awemeId,
|
|
|
+ videoTitle: work.title,
|
|
|
+ videoCoverUrl: work.coverUrl,
|
|
|
+ comments,
|
|
|
+ });
|
|
|
+ logger.info(`[API Interception] Got ${comments.length} comments for ${work.awemeId} via direct API`);
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ // 如果直接 API 没有获取到足够的评论,使用页面交互方式
|
|
|
+ const worksNeedingPageMethod = worksWithComments.filter(w => {
|
|
|
+ const found = allWorkComments.find(wc => wc.videoId === w.awemeId);
|
|
|
+ return !found || found.comments.length === 0;
|
|
|
+ });
|
|
|
+
|
|
|
+ if (worksNeedingPageMethod.length > 0) {
|
|
|
+ logger.info(`[API Interception] ${worksNeedingPageMethod.length} works need page interaction method`);
|
|
|
+
|
|
|
+ // works 是所有作品的列表(包括没有评论的),弹窗中的顺序应该和这个一致
|
|
|
+ // worksWithComments 是有评论的作品列表
|
|
|
+ logger.info(`[Page Method] All works: ${works.length}, works with comments: ${worksWithComments.length}`);
|
|
|
+ logger.info(`[Page Method] Works with comments IDs: ${worksWithComments.map(w => w.awemeId).join(', ')}`);
|
|
|
+
|
|
|
+ // 构建作品索引映射:在所有作品列表中,每个有评论的作品的索引是多少
|
|
|
+ const workIndexMap = new Map<string, number>();
|
|
|
+ works.forEach((w, idx) => {
|
|
|
+ if (w.commentCount > 0) {
|
|
|
+ workIndexMap.set(w.awemeId, idx);
|
|
|
+ }
|
|
|
+ });
|
|
|
+ logger.info(`[Page Method] Work index map: ${JSON.stringify(Object.fromEntries(workIndexMap))}`);
|
|
|
+
|
|
|
+ // 导航到评论管理页面
|
|
|
+ logger.info('[Page Method] Navigating to comment management page...');
|
|
|
+ await page.goto('https://creator.douyin.com/creator-micro/interactive/comment', {
|
|
|
+ waitUntil: 'domcontentloaded',
|
|
|
+ timeout: 60000,
|
|
|
+ });
|
|
|
+
|
|
|
+ // 等待页面加载
|
|
|
+ await page.waitForTimeout(5000);
|
|
|
+
|
|
|
+ // 用于存储最新捕获的评论和 aweme_id
|
|
|
+ const latestHolder: { comments: CommentItem[]; awemeId: string } = { comments: [], awemeId: '' };
|
|
|
+
|
|
|
+ // 设置监听器 - 捕获评论 API 响应
|
|
|
+ page.on('response', async (response) => {
|
|
|
+ const url = response.url();
|
|
|
+ if (url.includes('/comment/list') || url.includes('/comment/read')) {
|
|
|
+ try {
|
|
|
+ const jsonData = await response.json();
|
|
|
+ let parsedComments: CommentItem[] = [];
|
|
|
+ let capturedAwemeId = '';
|
|
|
+
|
|
|
+ // 从 URL 中提取 aweme_id(格式1有)
|
|
|
+ const awemeIdMatch = url.match(/aweme_id=(\d+)/);
|
|
|
+ capturedAwemeId = awemeIdMatch?.[1] || '';
|
|
|
+
|
|
|
+ // 格式1: { comments: [...] }
|
|
|
+ if (jsonData?.comments && Array.isArray(jsonData.comments) && jsonData.comments.length > 0) {
|
|
|
+ // 从评论中提取 aweme_id
|
|
|
+ const firstComment = jsonData.comments[0] as Record<string, unknown>;
|
|
|
+ if (!capturedAwemeId && firstComment.aweme_id) {
|
|
|
+ capturedAwemeId = String(firstComment.aweme_id);
|
|
|
+ }
|
|
|
+
|
|
|
+ parsedComments = jsonData.comments.map((c: Record<string, unknown>) => ({
|
|
|
+ commentId: String((c as { cid?: string }).cid || ''),
|
|
|
+ authorId: String(((c as { user?: { uid?: string } }).user)?.uid || ''),
|
|
|
+ authorName: String(((c as { user?: { nickname?: string } }).user)?.nickname || '匿名'),
|
|
|
+ authorAvatar: ((c as { user?: { avatar_thumb?: { url_list?: string[] } } }).user)?.avatar_thumb?.url_list?.[0] || '',
|
|
|
+ content: String((c as { text?: string }).text || ''),
|
|
|
+ likeCount: Number((c as { digg_count?: number }).digg_count || 0),
|
|
|
+ commentTime: new Date(Number((c as { create_time?: number }).create_time || 0) * 1000).toISOString(),
|
|
|
+ videoId: capturedAwemeId,
|
|
|
+ }));
|
|
|
+ logger.info(`[Comment API] Format1: ${parsedComments.length} comments, aweme_id: ${capturedAwemeId}`);
|
|
|
+ }
|
|
|
+
|
|
|
+ // 格式2: { comment_info_list: [...] }
|
|
|
+ if (jsonData?.comment_info_list && Array.isArray(jsonData.comment_info_list) && jsonData.comment_info_list.length > 0) {
|
|
|
+ parsedComments = jsonData.comment_info_list.map((c: Record<string, unknown>) => {
|
|
|
+ const userInfo = c.user_info as Record<string, unknown> | undefined;
|
|
|
+ return {
|
|
|
+ commentId: String(c.comment_id || ''),
|
|
|
+ authorId: String(userInfo?.user_id || ''),
|
|
|
+ authorName: String(userInfo?.screen_name || '匿名'),
|
|
|
+ authorAvatar: String(userInfo?.avatar_url || ''),
|
|
|
+ content: String(c.text || ''),
|
|
|
+ likeCount: Number(c.digg_count || 0),
|
|
|
+ commentTime: new Date(Number(c.create_time || 0) * 1000).toISOString(),
|
|
|
+ videoId: '', // 格式2没有aweme_id,稍后填充
|
|
|
+ };
|
|
|
+ });
|
|
|
+ logger.info(`[Comment API] Format2: ${parsedComments.length} comments (no aweme_id)`);
|
|
|
+ }
|
|
|
+
|
|
|
+ if (parsedComments.length > 0) {
|
|
|
+ latestHolder.comments = parsedComments;
|
|
|
+ latestHolder.awemeId = capturedAwemeId;
|
|
|
+ }
|
|
|
+ } catch {
|
|
|
+ // 忽略
|
|
|
+ }
|
|
|
+ }
|
|
|
+ });
|
|
|
+
|
|
|
+ // 等待第一个作品的评论加载(格式1,有 aweme_id)
|
|
|
+ await page.waitForTimeout(4000);
|
|
|
+
|
|
|
+ // 处理第一个作品(页面加载时自动显示,格式1有aweme_id可以直接匹配)
|
|
|
+ if (latestHolder.comments.length > 0 && latestHolder.awemeId) {
|
|
|
+ const matchedWork = works.find(w => w.awemeId === latestHolder.awemeId);
|
|
|
+ if (matchedWork) {
|
|
|
+ allWorkComments.push({
|
|
|
+ videoId: matchedWork.awemeId,
|
|
|
+ videoTitle: matchedWork.title,
|
|
|
+ videoCoverUrl: matchedWork.coverUrl,
|
|
|
+ comments: latestHolder.comments,
|
|
|
+ });
|
|
|
+ logger.info(`[Page Method] First work (by aweme_id): ${latestHolder.comments.length} comments for ${matchedWork.awemeId}`);
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ // 遍历其他有评论的作品
|
|
|
+ for (const work of worksWithComments) {
|
|
|
+ // 检查是否已获取
|
|
|
+ const existing = allWorkComments.find(wc => wc.videoId === work.awemeId);
|
|
|
+ if (existing) {
|
|
|
+ logger.info(`[Page Method] Skip ${work.awemeId}, already got ${existing.comments.length} comments`);
|
|
|
+ continue;
|
|
|
+ }
|
|
|
+
|
|
|
+ // 获取该作品在所有作品列表中的索引
|
|
|
+ const workIndex = workIndexMap.get(work.awemeId);
|
|
|
+ if (workIndex === undefined) {
|
|
|
+ logger.warn(`[Page Method] Work ${work.awemeId} not found in index map`);
|
|
|
+ continue;
|
|
|
+ }
|
|
|
+
|
|
|
+ logger.info(`[Page Method] Processing work: ${work.title.slice(0, 20)}... (awemeId: ${work.awemeId}, index: ${workIndex})`);
|
|
|
+
|
|
|
+ try {
|
|
|
+ // 清空之前的数据
|
|
|
+ latestHolder.comments = [];
|
|
|
+ latestHolder.awemeId = '';
|
|
|
+
|
|
|
+ // 点击"选择作品"按钮
|
|
|
+ await page.click('button:has-text("选择作品")');
|
|
|
+ await page.waitForTimeout(2000);
|
|
|
+
|
|
|
+ // 找到弹窗中的作品图片列表
|
|
|
+ const workImages = await page.$$('[role="dialog"] img[src*="douyinpic"], .douyin-creator-interactive-sidesheet-inner img[src*="douyinpic"]');
|
|
|
+ logger.info(`[Page Method] Found ${workImages.length} work images in dialog`);
|
|
|
+
|
|
|
+ if (workIndex < workImages.length) {
|
|
|
+ // 点击对应索引的作品
|
|
|
+ await workImages[workIndex].click();
|
|
|
+ logger.info(`[Page Method] Clicked work image at index ${workIndex}`);
|
|
|
+
|
|
|
+ // 等待评论 API 响应
|
|
|
+ await page.waitForTimeout(4000);
|
|
|
+
|
|
|
+ // 获取评论
|
|
|
+ if (latestHolder.comments.length > 0) {
|
|
|
+ // 使用当前 work 的 awemeId
|
|
|
+ const comments = latestHolder.comments.map(c => ({
|
|
|
+ ...c,
|
|
|
+ videoId: work.awemeId,
|
|
|
+ }));
|
|
|
+ allWorkComments.push({
|
|
|
+ videoId: work.awemeId,
|
|
|
+ videoTitle: work.title,
|
|
|
+ videoCoverUrl: work.coverUrl,
|
|
|
+ comments,
|
|
|
+ });
|
|
|
+ logger.info(`[Page Method] Got ${comments.length} comments for ${work.awemeId}`);
|
|
|
+ } else {
|
|
|
+ // 尝试从页面提取
|
|
|
+ const pageComments = await this.extractCommentsFromPage(page);
|
|
|
+ if (pageComments.length > 0) {
|
|
|
+ const comments = pageComments.map(c => ({ ...c, videoId: work.awemeId }));
|
|
|
+ allWorkComments.push({
|
|
|
+ videoId: work.awemeId,
|
|
|
+ videoTitle: work.title,
|
|
|
+ videoCoverUrl: work.coverUrl,
|
|
|
+ comments,
|
|
|
+ });
|
|
|
+ logger.info(`[Page Method] Extracted ${comments.length} comments from page`);
|
|
|
+ } else {
|
|
|
+ logger.warn(`[Page Method] No comments for ${work.awemeId}`);
|
|
|
+ }
|
|
|
+ }
|
|
|
+ } else {
|
|
|
+ logger.warn(`[Page Method] Index ${workIndex} out of range, only ${workImages.length} images`);
|
|
|
+ await page.keyboard.press('Escape');
|
|
|
+ }
|
|
|
+
|
|
|
+ await page.waitForTimeout(1000);
|
|
|
+ } catch (e) {
|
|
|
+ logger.warn(`[Page Method] Error for work ${work.awemeId}:`, e);
|
|
|
+ await page.keyboard.press('Escape').catch(() => { });
|
|
|
+ await page.waitForTimeout(500);
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ logger.info(`[API Interception] Total result: ${allWorkComments.length} works with comments`);
|
|
|
+ await context.close();
|
|
|
+ } catch (error) {
|
|
|
+ logger.error('[API Interception] Error:', error);
|
|
|
+ } finally {
|
|
|
+ await browser.close();
|
|
|
+ }
|
|
|
+
|
|
|
+ return allWorkComments;
|
|
|
+ }
|
|
|
+
|
|
|
+ /**
|
|
|
+ * 从页面获取当前作品的 videoId
|
|
|
+ */
|
|
|
+ private async getCurrentVideoIdFromPage(page: Page): Promise<string | null> {
|
|
|
+ try {
|
|
|
+ // 尝试从页面 URL 或 DOM 中提取 aweme_id
|
|
|
+ const videoId = await page.evaluate(() => {
|
|
|
+ // 方法1: 从 URL 中提取
|
|
|
+ const url = window.location.href;
|
|
|
+ const urlMatch = url.match(/aweme_id=(\d+)/);
|
|
|
+ if (urlMatch) return urlMatch[1];
|
|
|
+
|
|
|
+ // 方法2: 从页面元素中提取 (如果有的话)
|
|
|
+ const dataEl = document.querySelector('[data-aweme-id]');
|
|
|
+ if (dataEl) {
|
|
|
+ return dataEl.getAttribute('data-aweme-id');
|
|
|
+ }
|
|
|
+
|
|
|
+ return null;
|
|
|
+ });
|
|
|
+
|
|
|
+ return videoId;
|
|
|
+ } catch {
|
|
|
+ return null;
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ /**
|
|
|
+ * 直接调用评论 API 获取数据
|
|
|
+ */
|
|
|
+ private async fetchCommentsDirectApi(page: Page, awemeId: string): Promise<CommentItem[]> {
|
|
|
+ const comments: CommentItem[] = [];
|
|
|
+
|
|
|
+ try {
|
|
|
+ // 使用 page.evaluate 在页面上下文中调用 fetch API
|
|
|
+ const data = await page.evaluate(async (videoId) => {
|
|
|
+ const url = `https://creator.douyin.com/web/api/third_party/aweme/api/comment/read/aweme/v1/web/comment/list/select/?aweme_id=${videoId}&cursor=0&count=50&comment_select_options=0&sort_options=0&channel_id=618&app_id=2906&aid=2906&device_platform=webapp`;
|
|
|
+
|
|
|
+ const resp = await fetch(url, {
|
|
|
+ credentials: 'include',
|
|
|
+ headers: {
|
|
|
+ 'Accept': 'application/json',
|
|
|
+ },
|
|
|
+ });
|
|
|
+
|
|
|
+ return resp.json();
|
|
|
+ }, awemeId);
|
|
|
+
|
|
|
+ if (data?.comments && Array.isArray(data.comments)) {
|
|
|
+ for (const c of data.comments) {
|
|
|
+ comments.push({
|
|
|
+ commentId: String(c.cid || ''),
|
|
|
+ authorId: String(c.user?.uid || ''),
|
|
|
+ authorName: String(c.user?.nickname || '匿名'),
|
|
|
+ authorAvatar: c.user?.avatar_thumb?.url_list?.[0] || '',
|
|
|
+ content: String(c.text || ''),
|
|
|
+ likeCount: Number(c.digg_count || 0),
|
|
|
+ commentTime: new Date(Number(c.create_time || 0) * 1000).toISOString(),
|
|
|
+ videoId: String(c.aweme_id || awemeId),
|
|
|
+ });
|
|
|
+ }
|
|
|
+ logger.info(`[DirectAPI] Fetched ${comments.length} comments for ${awemeId}`);
|
|
|
+ }
|
|
|
+ } catch (e) {
|
|
|
+ logger.warn(`[DirectAPI] Failed to fetch comments for ${awemeId}:`, e);
|
|
|
+ }
|
|
|
+
|
|
|
+ return comments;
|
|
|
+ }
|
|
|
+
|
|
|
+ /**
|
|
|
+ * 直接调用抖音 API 获取作品列表
|
|
|
+ * 使用新的 work_list 接口,支持分页加载
|
|
|
+ */
|
|
|
+ private async fetchWorksDirectApi(page: Page): Promise<Array<{
|
|
|
+ awemeId: string;
|
|
|
+ title: string;
|
|
|
+ coverUrl: string;
|
|
|
+ commentCount: number;
|
|
|
+ createTime?: number;
|
|
|
+ }>> {
|
|
|
+ const works: Array<{
|
|
|
+ awemeId: string;
|
|
|
+ title: string;
|
|
|
+ coverUrl: string;
|
|
|
+ commentCount: number;
|
|
|
+ createTime?: number;
|
|
|
+ }> = [];
|
|
|
+
|
|
|
+ try {
|
|
|
+ let hasMore = true;
|
|
|
+ let maxCursor = 0;
|
|
|
+ let pageCount = 0;
|
|
|
+ const maxPages = 20; // 最多加载20页,防止无限循环
|
|
|
+
|
|
|
+ while (hasMore && pageCount < maxPages) {
|
|
|
+ pageCount++;
|
|
|
+ logger.info(`[DirectAPI] Fetching works page ${pageCount}, cursor: ${maxCursor}`);
|
|
|
+
|
|
|
+ const data = await page.evaluate(async (cursor: number) => {
|
|
|
+ // 使用新的 work_list API 接口
|
|
|
+ const url = `https://creator.douyin.com/janus/douyin/creator/pc/work_list?scene=star_atlas&device_platform=android&status=0&count=12&max_cursor=${cursor}&cookie_enabled=true&browser_language=zh-CN&browser_platform=Win32&browser_name=Mozilla&browser_online=true&timezone_name=Asia%2FShanghai&aid=1128`;
|
|
|
+
|
|
|
+ const resp = await fetch(url, {
|
|
|
+ credentials: 'include',
|
|
|
+ headers: {
|
|
|
+ 'Accept': 'application/json',
|
|
|
+ },
|
|
|
+ });
|
|
|
+
|
|
|
+ return resp.json();
|
|
|
+ }, maxCursor);
|
|
|
+
|
|
|
+ // 解析 aweme_list 中的作品数据
|
|
|
+ const awemeList = data?.aweme_list || [];
|
|
|
+ logger.info(`[DirectAPI] Page ${pageCount}: got ${awemeList.length} works from aweme_list`);
|
|
|
+
|
|
|
+ for (const aweme of awemeList) {
|
|
|
+ const awemeId = String(aweme.aweme_id || '');
|
|
|
+ if (!awemeId) continue;
|
|
|
+
|
|
|
+ // 从 statistics 中获取评论数
|
|
|
+ const statistics = aweme.statistics || {};
|
|
|
+ const commentCount = parseInt(String(statistics.comment_count || '0'), 10);
|
|
|
+
|
|
|
+ // 获取标题:优先使用 item_title,其次使用 desc(描述)
|
|
|
+ let title = aweme.item_title || '';
|
|
|
+ if (!title) {
|
|
|
+ // 从 desc 中提取标题(取第一行或前50个字符)
|
|
|
+ const desc = aweme.desc || aweme.caption || '';
|
|
|
+ title = desc.split('\n')[0].slice(0, 50) || '无标题';
|
|
|
+ }
|
|
|
+
|
|
|
+ // 获取封面 URL:从 Cover.url_list 或 video.cover.url_list 中获取
|
|
|
+ let coverUrl = '';
|
|
|
+ if (aweme.Cover?.url_list?.length > 0) {
|
|
|
+ coverUrl = aweme.Cover.url_list[0];
|
|
|
+ } else if (aweme.video?.cover?.url_list?.length > 0) {
|
|
|
+ coverUrl = aweme.video.cover.url_list[0];
|
|
|
+ }
|
|
|
+
|
|
|
+ works.push({
|
|
|
+ awemeId,
|
|
|
+ title,
|
|
|
+ coverUrl,
|
|
|
+ commentCount,
|
|
|
+ createTime: aweme.create_time,
|
|
|
+ });
|
|
|
+ }
|
|
|
+
|
|
|
+ // 检查是否有更多数据
|
|
|
+ hasMore = data?.has_more === true;
|
|
|
+
|
|
|
+ // 更新游标:使用返回的 max_cursor 或基于最后一个作品的 create_time
|
|
|
+ if (data?.max_cursor) {
|
|
|
+ maxCursor = data.max_cursor;
|
|
|
+ } else if (awemeList.length > 0) {
|
|
|
+ // 如果没有 max_cursor,使用最后一个作品的 create_time 作为游标
|
|
|
+ const lastAweme = awemeList[awemeList.length - 1];
|
|
|
+ if (lastAweme.create_time) {
|
|
|
+ maxCursor = lastAweme.create_time * 1000; // 转换为毫秒
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ // 如果没有获取到数据,停止循环
|
|
|
+ if (awemeList.length === 0) {
|
|
|
+ hasMore = false;
|
|
|
+ }
|
|
|
+
|
|
|
+ // 稍微延迟,避免请求过快
|
|
|
+ if (hasMore) {
|
|
|
+ await new Promise(resolve => setTimeout(resolve, 500));
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ logger.info(`[DirectAPI] Total fetched ${works.length} works from ${pageCount} pages`);
|
|
|
+ } catch (e) {
|
|
|
+ logger.warn('[DirectAPI] Failed to fetch works:', e);
|
|
|
+ }
|
|
|
+
|
|
|
+ return works;
|
|
|
+ }
|
|
|
+
|
|
|
+ /**
|
|
|
+ * 获取抖音评论 (旧版 - DOM解析方式)
|
|
|
+ * 模拟点击"选择作品"按钮,依次点击作品获取评论
|
|
|
+ * 作为备用方案
|
|
|
+ */
|
|
|
+ async fetchDouyinCommentsViaApi(cookies: CookieData[]): Promise<WorkComments[]> {
|
|
|
+ // 优先使用 API 拦截方式
|
|
|
+ const result = await this.fetchDouyinCommentsByApiInterception(cookies);
|
|
|
+ if (result.length > 0) {
|
|
|
+ return result;
|
|
|
+ }
|
|
|
+
|
|
|
+ // 如果 API 方式失败,使用旧的 DOM 解析方式作为备用
|
|
|
+ logger.info('[Fallback] Using DOM parsing method...');
|
|
|
+
|
|
|
+ const browser = await chromium.launch({
|
|
|
+ headless: true, // 改为无头模式
|
|
|
+ args: ['--no-sandbox', '--disable-setuid-sandbox'],
|
|
|
+ });
|
|
|
+
|
|
|
+ const allWorkComments: WorkComments[] = [];
|
|
|
+
|
|
|
+ try {
|
|
|
+ const context = await browser.newContext({
|
|
|
+ viewport: { width: 1920, height: 1080 },
|
|
|
+ userAgent: 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
|
|
|
+ });
|
|
|
+
|
|
|
+ // 设置 Cookie
|
|
|
+ const playwrightCookies = cookies.map(c => ({
|
|
|
+ name: c.name,
|
|
|
+ value: c.value,
|
|
|
+ domain: c.domain || '.douyin.com',
|
|
|
+ path: c.path || '/',
|
|
|
+ }));
|
|
|
+ await context.addCookies(playwrightCookies);
|
|
|
+ logger.info(`Set ${playwrightCookies.length} cookies`);
|
|
|
+
|
|
|
+ const page = await context.newPage();
|
|
|
+
|
|
|
+ // 导航到评论管理页面
|
|
|
+ logger.info('Navigating to Douyin comment management page...');
|
|
|
+ await page.goto('https://creator.douyin.com/creator-micro/interactive/comment', {
|
|
|
+ waitUntil: 'domcontentloaded',
|
|
|
+ timeout: 60000,
|
|
|
+ });
|
|
|
+
|
|
|
+ // 等待页面完全加载
|
|
|
+ logger.info('Waiting for page to fully load...');
|
|
|
+ await page.waitForTimeout(5000);
|
|
|
+
|
|
|
+ // 检查是否需要登录
|
|
|
+ const currentUrl = page.url();
|
|
|
+ logger.info(`Current URL: ${currentUrl}`);
|
|
|
+ if (currentUrl.includes('login') || currentUrl.includes('passport')) {
|
|
|
+ logger.warn('Cookie expired, need re-login');
|
|
|
+ await browser.close();
|
|
|
+ return allWorkComments;
|
|
|
+ }
|
|
|
+
|
|
|
+ logger.info('Page loaded successfully');
|
|
|
+
|
|
|
+ // 等待评论列表加载
|
|
|
+ logger.info('Waiting for comments to load...');
|
|
|
+ try {
|
|
|
+ await page.waitForSelector('[class*="comment-content-text-"]', { timeout: 10000 });
|
|
|
+ logger.info('Comments loaded');
|
|
|
+ } catch {
|
|
|
+ logger.warn('No comments found on initial load, will try to select works');
|
|
|
+ }
|
|
|
+
|
|
|
+ // 辅助函数:从当前页面提取评论
|
|
|
+ const extractCommentsFromCurrentPage = async (): Promise<CommentItem[]> => {
|
|
|
+ logger.info('Extracting comments from current page...');
|
|
|
+
|
|
|
+ // 滚动页面加载所有评论
|
|
|
+ await page.evaluate(async () => {
|
|
|
+ // 滚动多次加载更多评论
|
|
|
+ for (let i = 0; i < 10; i++) {
|
|
|
+ window.scrollBy(0, 500);
|
|
|
+ await new Promise(r => setTimeout(r, 800));
|
|
|
+ }
|
|
|
+ window.scrollTo(0, 0);
|
|
|
+ });
|
|
|
+
|
|
|
+ await page.waitForTimeout(2000);
|
|
|
+
|
|
|
+ // 使用精确选择器提取评论
|
|
|
+ const comments = await page.evaluate(() => {
|
|
|
+ const result: Array<{
|
|
|
+ commentId: string;
|
|
|
+ authorId: string;
|
|
|
+ authorName: string;
|
|
|
+ authorAvatar: string;
|
|
|
+ content: string;
|
|
|
+ likeCount: number;
|
|
|
+ commentTime: string;
|
|
|
+ }> = [];
|
|
|
+
|
|
|
+ const seenContents = new Set<string>();
|
|
|
+
|
|
|
+ // 查找所有评论容器:包含 username 和 comment-content-text 的 container
|
|
|
+ const allContainers = document.querySelectorAll('[class*="container-"]');
|
|
|
+ const commentContainers: Element[] = [];
|
|
|
+
|
|
|
+ allContainers.forEach(container => {
|
|
|
+ const hasUsername = container.querySelector('[class*="username-"]');
|
|
|
+ const hasCommentContent = container.querySelector('[class*="comment-content-text-"]');
|
|
|
+ if (hasUsername && hasCommentContent) {
|
|
|
+ commentContainers.push(container);
|
|
|
+ }
|
|
|
+ });
|
|
|
+
|
|
|
+ console.log(`Found ${commentContainers.length} comment containers`);
|
|
|
+
|
|
|
+ commentContainers.forEach((container, index) => {
|
|
|
+ try {
|
|
|
+ // 用户名
|
|
|
+ const usernameEl = container.querySelector('[class*="username-"]');
|
|
|
+ const authorName = usernameEl?.textContent?.trim() || '未知用户';
|
|
|
+
|
|
|
+ // 时间
|
|
|
+ const timeEl = container.querySelector('[class*="time-"]');
|
|
|
+ const commentTime = timeEl?.textContent?.trim() || '';
|
|
|
+
|
|
|
+ // 评论内容
|
|
|
+ const contentEl = container.querySelector('[class*="comment-content-text-"]');
|
|
|
+ const content = contentEl?.textContent?.trim() || '';
|
|
|
+
|
|
|
+ if (!content) return;
|
|
|
+
|
|
|
+ // 头像
|
|
|
+ const avatarContainer = container.querySelector('[class*="avatar-"]');
|
|
|
+ const avatarImg = avatarContainer?.querySelector('img');
|
|
|
+ const authorAvatar = avatarImg?.src || '';
|
|
|
+
|
|
|
+ // 去重
|
|
|
+ const key = `${authorName}||${content}`;
|
|
|
+ if (seenContents.has(key)) return;
|
|
|
+ seenContents.add(key);
|
|
|
+
|
|
|
+ // 点赞数
|
|
|
+ let likeCount = 0;
|
|
|
+ const opsEl = container.querySelector('[class*="operations-"]');
|
|
|
+ if (opsEl) {
|
|
|
+ const itemEl = opsEl.querySelector('[class*="item-"]');
|
|
|
+ if (itemEl) {
|
|
|
+ const match = itemEl.textContent?.match(/(\d+)/);
|
|
|
+ if (match) likeCount = parseInt(match[1], 10);
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ // 生成 ID
|
|
|
+ const hash = content.slice(0, 30) + authorName + commentTime;
|
|
|
+ const commentId = `dy_${btoa(encodeURIComponent(hash)).slice(0, 20)}`;
|
|
|
+
|
|
|
+ result.push({
|
|
|
+ commentId,
|
|
|
+ authorId: authorName,
|
|
|
+ authorName,
|
|
|
+ authorAvatar,
|
|
|
+ content,
|
|
|
+ likeCount,
|
|
|
+ commentTime,
|
|
|
+ });
|
|
|
+
|
|
|
+ console.log(`[${index}] ${authorName}: ${content.slice(0, 30)}`);
|
|
|
+ } catch (e) {
|
|
|
+ console.error(`Error at ${index}:`, e);
|
|
|
+ }
|
|
|
+ });
|
|
|
+
|
|
|
+ return result;
|
|
|
+ });
|
|
|
+
|
|
|
+ logger.info(`Extracted ${comments.length} comments`);
|
|
|
+ return comments;
|
|
|
+ };
|
|
|
+
|
|
|
+ // 辅助函数:获取当前显示的作品标题
|
|
|
+ const getCurrentWorkTitle = async (): Promise<string> => {
|
|
|
+ return page.evaluate(() => {
|
|
|
+ // 查找作品标题 - 通常在页面顶部区域
|
|
|
+ // 排除筛选器和按钮中的文本
|
|
|
+ const excludeTexts = ['全部评论', '最新发布', '全部人群', '搜索', '选择作品', '评论管理'];
|
|
|
+
|
|
|
+ // 方法1: 查找视频信息区域
|
|
|
+ const videoInfoSelectors = [
|
|
|
+ '[class*="video-info"] [class*="title"]',
|
|
|
+ '[class*="work-info"] [class*="title"]',
|
|
|
+ '[class*="content-info"] [class*="title"]',
|
|
|
+ ];
|
|
|
+
|
|
|
+ for (const selector of videoInfoSelectors) {
|
|
|
+ const el = document.querySelector(selector);
|
|
|
+ if (el?.textContent) {
|
|
|
+ const text = el.textContent.trim();
|
|
|
+ if (text.length > 5 && !excludeTexts.some(e => text.includes(e))) {
|
|
|
+ return text;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ // 方法2: 查找页面上较长的标题文本
|
|
|
+ const allTexts = document.querySelectorAll('div, span, p');
|
|
|
+ for (const el of Array.from(allTexts)) {
|
|
|
+ const text = el.textContent?.trim() || '';
|
|
|
+ if (text.length > 20 &&
|
|
|
+ text.length < 200 &&
|
|
|
+ !excludeTexts.some(e => text.includes(e)) &&
|
|
|
+ !el.closest('button') &&
|
|
|
+ !el.closest('[class*="select"]') &&
|
|
|
+ !el.closest('[class*="filter"]')) {
|
|
|
+ // 检查是否可能是作品标题(通常包含特定字符或格式)
|
|
|
+ if (text.includes('#') || text.match(/[,。!?、]/)) {
|
|
|
+ return text;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ return '';
|
|
|
+ });
|
|
|
+ };
|
|
|
+
|
|
|
+ // 步骤1: 先获取当前页面显示的评论(默认显示的第一个作品)
|
|
|
+ logger.info('Step 1: Getting comments from default view...');
|
|
|
+ const defaultTitle = await getCurrentWorkTitle();
|
|
|
+ const defaultComments = await extractCommentsFromCurrentPage();
|
|
|
+
|
|
|
+ if (defaultComments.length > 0) {
|
|
|
+ allWorkComments.push({
|
|
|
+ videoId: `video_${Date.now()}`,
|
|
|
+ videoTitle: defaultTitle || '默认作品',
|
|
|
+ videoCoverUrl: '',
|
|
|
+ comments: defaultComments,
|
|
|
+ });
|
|
|
+ logger.info(`Got ${defaultComments.length} comments from default view, title: "${defaultTitle.slice(0, 50)}"`);
|
|
|
+ }
|
|
|
+
|
|
|
+ // 步骤2: 尝试点击"选择作品"按钮获取更多作品的评论
|
|
|
+ logger.info('Step 2: Looking for "选择作品" button...');
|
|
|
+
|
|
|
+ // 使用 locator 查找按钮
|
|
|
+ const selectBtn = page.locator('text=选择作品').first();
|
|
|
+ const btnCount = await selectBtn.count();
|
|
|
+ logger.info(`Found ${btnCount} "选择作品" button(s)`);
|
|
|
+
|
|
|
+ if (btnCount > 0) {
|
|
|
+ logger.info('Clicking "选择作品" button...');
|
|
|
+ await selectBtn.click();
|
|
|
+
|
|
|
+ // 等待更长时间,确保弹窗完全加载
|
|
|
+ logger.info('Waiting for work list modal to appear...');
|
|
|
+ await page.waitForTimeout(5000);
|
|
|
+
|
|
|
+ // 打印当前页面状态,帮助调试
|
|
|
+ const modalInfo = await page.evaluate(() => {
|
|
|
+ // 查找所有可能的弹窗元素
|
|
|
+ const modals = document.querySelectorAll('[class*="modal"], [class*="popup"], [class*="drawer"], [class*="dialog"], [role="dialog"]');
|
|
|
+ const modalClasses = Array.from(modals).map(m => m.className).slice(0, 5);
|
|
|
+
|
|
|
+ // 查找所有图片(作品封面)
|
|
|
+ const images = document.querySelectorAll('img[src*="douyinpic"]');
|
|
|
+
|
|
|
+ // 查找所有可能的卡片元素
|
|
|
+ const cards = document.querySelectorAll('[class*="card"], [class*="item"]');
|
|
|
+ const cardClasses = Array.from(cards).map(c => c.className).slice(0, 10);
|
|
|
+
|
|
|
+ return {
|
|
|
+ modalCount: modals.length,
|
|
|
+ modalClasses,
|
|
|
+ imageCount: images.length,
|
|
|
+ cardCount: cards.length,
|
|
|
+ cardClasses,
|
|
|
+ };
|
|
|
+ });
|
|
|
+
|
|
|
+ logger.info(`Modal debug: ${JSON.stringify(modalInfo)}`);
|
|
|
+
|
|
|
+ // 尝试多种选择器查找作品列表
|
|
|
+ const workSelectors = [
|
|
|
+ '[class*="video-card"]',
|
|
|
+ '[class*="work-item"]',
|
|
|
+ '[class*="content-item"]',
|
|
|
+ '[class*="modal"] [class*="card"]',
|
|
|
+ '[class*="modal"] img',
|
|
|
+ '[class*="drawer"] [class*="card"]',
|
|
|
+ '[class*="drawer"] img',
|
|
|
+ '[role="dialog"] [class*="card"]',
|
|
|
+ '[role="dialog"] img',
|
|
|
+ '[class*="popup"] img',
|
|
|
+ 'img[src*="douyinpic"]', // 直接找抖音图片
|
|
|
+ ];
|
|
|
+
|
|
|
+ let workElements: Awaited<ReturnType<typeof page.$$>> = [];
|
|
|
+ let usedSelector = '';
|
|
|
+
|
|
|
+ for (const selector of workSelectors) {
|
|
|
+ const elements = await page.$$(selector);
|
|
|
+ logger.info(`Selector "${selector}" found ${elements.length} elements`);
|
|
|
+ if (elements.length > 0 && elements.length < 50) { // 避免选中太多无关元素
|
|
|
+ workElements = elements;
|
|
|
+ usedSelector = selector;
|
|
|
+ break;
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ logger.info(`Using selector "${usedSelector}", found ${workElements.length} work items`);
|
|
|
+
|
|
|
+ if (workElements.length > 0) {
|
|
|
+ // 首先获取所有作品的评论数信息
|
|
|
+ // 根据 HTML 结构:
|
|
|
+ // - 作品项容器: div.container-Lkxos9 (类名可能变化,使用 [class*="container-"])
|
|
|
+ // - 标题: div.title-LUOP3b (类名可能变化,使用 [class*="title-"])
|
|
|
+ // - 评论数: div.right-os7ZB9 > div (类名可能变化,使用 [class*="right-"] > div)
|
|
|
+ const workInfoList = await page.evaluate(() => {
|
|
|
+ const works: Array<{ index: number; title: string; commentCount: number }> = [];
|
|
|
+
|
|
|
+ // 查找作品列表容器中的所有作品项
|
|
|
+ // 根据用户提供的 HTML,作品项的类名是 container-Lkxos9
|
|
|
+ const workContainers = document.querySelectorAll('[role="dialog"] [class*="container-"]');
|
|
|
+
|
|
|
+ console.log(`Found ${workContainers.length} work containers`);
|
|
|
+
|
|
|
+ workContainers.forEach((container, index) => {
|
|
|
+ // 检查是否包含图片(确认是作品项而不是其他容器)
|
|
|
+ const img = container.querySelector('img[src*="douyinpic"]');
|
|
|
+ if (!img) {
|
|
|
+ console.log(`Container ${index} has no douyinpic image, skipping`);
|
|
|
+ return;
|
|
|
+ }
|
|
|
+
|
|
|
+ // 提取标题
|
|
|
+ const titleEl = container.querySelector('[class*="title-"]');
|
|
|
+ const title = titleEl?.textContent?.trim() || `作品 ${works.length + 1}`;
|
|
|
+
|
|
|
+ // 提取评论数 - 在 right- 容器的最后一个 div 中
|
|
|
+ let commentCount = 0;
|
|
|
+ const rightContainer = container.querySelector('[class*="right-"]');
|
|
|
+ if (rightContainer) {
|
|
|
+ // 获取 right 容器下的所有直接 div 子元素
|
|
|
+ const divs = rightContainer.querySelectorAll(':scope > div');
|
|
|
+ if (divs.length > 0) {
|
|
|
+ // 最后一个 div 包含评论数
|
|
|
+ const lastDiv = divs[divs.length - 1];
|
|
|
+ const text = lastDiv.textContent?.trim() || '0';
|
|
|
+ const num = parseInt(text, 10);
|
|
|
+ if (!isNaN(num)) {
|
|
|
+ commentCount = num;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ console.log(`Work ${works.length}: title="${title.slice(0, 30)}...", commentCount=${commentCount}`);
|
|
|
+
|
|
|
+ works.push({
|
|
|
+ index: works.length,
|
|
|
+ title: title.slice(0, 100),
|
|
|
+ commentCount
|
|
|
+ });
|
|
|
+ });
|
|
|
+
|
|
|
+ // 如果上面的选择器没找到,尝试备用方法
|
|
|
+ if (works.length === 0) {
|
|
|
+ console.log('Primary selector failed, trying fallback...');
|
|
|
+ // 直接查找包含 douyinpic 图片的元素的父容器
|
|
|
+ const images = document.querySelectorAll('[role="dialog"] img[src*="douyinpic"]');
|
|
|
+ images.forEach((img, index) => {
|
|
|
+ // 向上查找到作品项容器
|
|
|
+ let container = img.parentElement;
|
|
|
+ while (container && !container.classList.toString().includes('container-')) {
|
|
|
+ container = container.parentElement;
|
|
|
+ }
|
|
|
+
|
|
|
+ if (container) {
|
|
|
+ const titleEl = container.querySelector('[class*="title-"]');
|
|
|
+ const title = titleEl?.textContent?.trim() || `作品 ${index + 1}`;
|
|
|
+
|
|
|
+ // 查找评论数
|
|
|
+ let commentCount = 0;
|
|
|
+ const rightEl = container.querySelector('[class*="right-"]');
|
|
|
+ if (rightEl) {
|
|
|
+ const text = rightEl.textContent?.trim() || '';
|
|
|
+ // 提取最后出现的数字
|
|
|
+ const matches = text.match(/\d+/g);
|
|
|
+ if (matches && matches.length > 0) {
|
|
|
+ commentCount = parseInt(matches[matches.length - 1], 10);
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ works.push({ index, title: title.slice(0, 100), commentCount });
|
|
|
+ }
|
|
|
+ });
|
|
|
+ }
|
|
|
+
|
|
|
+ return works;
|
|
|
+ });
|
|
|
+
|
|
|
+ logger.info(`Work info list (${workInfoList.length} items): ${JSON.stringify(workInfoList)}`);
|
|
|
+
|
|
|
+ // 过滤出评论数 > 0 的作品,或者评论数未知(-1)的作品
|
|
|
+ const worksWithComments = workInfoList.filter(w => w.commentCount > 0 || w.commentCount === -1);
|
|
|
+ logger.info(`Found ${worksWithComments.length} works with comments > 0 or unknown (out of ${workInfoList.length})`);
|
|
|
+
|
|
|
+ // 如果所有作品评论数都是0,则不处理任何作品
|
|
|
+ const allZero = workInfoList.every(w => w.commentCount === 0);
|
|
|
+ if (allZero) {
|
|
|
+ logger.info('All works have 0 comments, skipping all');
|
|
|
+ }
|
|
|
+
|
|
|
+ // 如果没有找到评论数信息或有未知的,处理这些作品
|
|
|
+ const indicesToProcess = allZero
|
|
|
+ ? []
|
|
|
+ : (worksWithComments.length > 0
|
|
|
+ ? worksWithComments.map(w => w.index)
|
|
|
+ : Array.from({ length: Math.min(workElements.length, 10) }, (_, i) => i));
|
|
|
+
|
|
|
+ logger.info(`Will process work indices: ${indicesToProcess.join(', ')}`);
|
|
|
+
|
|
|
+ // 遍历每个有评论的作品
|
|
|
+ for (let idx = 0; idx < indicesToProcess.length; idx++) {
|
|
|
+ const i = indicesToProcess[idx];
|
|
|
+ try {
|
|
|
+ const workInfo = workInfoList.find(w => w.index === i);
|
|
|
+ logger.info(`Processing work ${idx + 1}/${indicesToProcess.length} (index=${i}, title="${workInfo?.title}", expectedComments=${workInfo?.commentCount})...`);
|
|
|
+
|
|
|
+ // 重新打开选择作品弹窗
|
|
|
+ if (idx > 0) {
|
|
|
+ await selectBtn.click();
|
|
|
+ await page.waitForTimeout(3000);
|
|
|
+ }
|
|
|
+
|
|
|
+ // 重新获取元素列表(因为 DOM 可能已变化)
|
|
|
+ const currentItems = await page.$$(usedSelector);
|
|
|
+ if (i < currentItems.length) {
|
|
|
+ // 滚动到元素可见
|
|
|
+ await currentItems[i].scrollIntoViewIfNeeded();
|
|
|
+ await page.waitForTimeout(500);
|
|
|
+
|
|
|
+ // 点击元素
|
|
|
+ await currentItems[i].click();
|
|
|
+ await page.waitForTimeout(4000);
|
|
|
+
|
|
|
+ // 获取评论
|
|
|
+ const title = await getCurrentWorkTitle();
|
|
|
+ const comments = await extractCommentsFromCurrentPage();
|
|
|
+
|
|
|
+ logger.info(`Work index=${i}: title="${title.slice(0, 50)}", comments=${comments.length}`);
|
|
|
+
|
|
|
+ // 检查是否已经获取过这个作品的评论
|
|
|
+ const exists = allWorkComments.some(w =>
|
|
|
+ w.videoTitle === title ||
|
|
|
+ (w.comments.length > 0 && comments.length > 0 &&
|
|
|
+ w.comments[0].content === comments[0].content)
|
|
|
+ );
|
|
|
+
|
|
|
+ if (!exists && (comments.length > 0 || title)) {
|
|
|
+ allWorkComments.push({
|
|
|
+ videoId: `video_${Date.now()}_${i}`,
|
|
|
+ videoTitle: title || workInfo?.title || `作品 ${i + 1}`,
|
|
|
+ videoCoverUrl: '',
|
|
|
+ comments,
|
|
|
+ });
|
|
|
+ logger.info(`Work index=${i}: Saved ${comments.length} comments`);
|
|
|
+ } else {
|
|
|
+ logger.info(`Work index=${i}: Skipped (duplicate or empty)`);
|
|
|
+ }
|
|
|
+ }
|
|
|
+ } catch (err) {
|
|
|
+ logger.warn(`Error processing work index=${i}:`, err);
|
|
|
+ }
|
|
|
+ }
|
|
|
+ } else {
|
|
|
+ logger.warn('No work items found in modal');
|
|
|
+ }
|
|
|
+
|
|
|
+ // 按 Escape 关闭弹窗
|
|
|
+ try {
|
|
|
+ await page.keyboard.press('Escape');
|
|
|
+ await page.waitForTimeout(500);
|
|
|
+ } catch { }
|
|
|
+ } else {
|
|
|
+ logger.warn('"选择作品" button not found, only default comments will be returned');
|
|
|
+ }
|
|
|
+
|
|
|
+ await page.close();
|
|
|
+ await context.close();
|
|
|
+ await browser.close();
|
|
|
+
|
|
|
+ const totalComments = allWorkComments.reduce((sum, w) => sum + w.comments.length, 0);
|
|
|
+ logger.info(`Total: fetched ${totalComments} comments from ${allWorkComments.length} works`);
|
|
|
+
|
|
|
+ return allWorkComments;
|
|
|
+
|
|
|
+ } catch (error) {
|
|
|
+ logger.error('Error fetching Douyin comments:', error);
|
|
|
+ try {
|
|
|
+ await browser.close();
|
|
|
+ } catch { }
|
|
|
+ return allWorkComments;
|
|
|
+ }
|
|
|
+ }
|
|
|
}
|
|
|
|
|
|
export const headlessBrowserService = new HeadlessBrowserService();
|