index.ts 60 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758
  1. import OpenAI from 'openai';
  2. import { config } from '../config/index.js';
  3. import { logger } from '../utils/logger.js';
  4. import type { ChatCompletionMessageParam, ChatCompletionTool } from 'openai/resources/chat/completions';
  5. /**
  6. * 消息角色类型
  7. */
  8. export type MessageRole = 'system' | 'user' | 'assistant' | 'tool';
  9. /**
  10. * 聊天消息接口
  11. */
  12. export interface ChatMessage {
  13. role: MessageRole;
  14. content: string;
  15. name?: string;
  16. tool_call_id?: string;
  17. }
  18. /**
  19. * 聊天补全选项
  20. */
  21. export interface ChatCompletionOptions {
  22. model?: string;
  23. messages: ChatMessage[];
  24. temperature?: number;
  25. maxTokens?: number;
  26. topP?: number;
  27. stream?: boolean;
  28. tools?: ChatCompletionTool[];
  29. toolChoice?: 'auto' | 'none' | { type: 'function'; function: { name: string } };
  30. responseFormat?: { type: 'text' | 'json_object' };
  31. stop?: string | string[];
  32. seed?: number;
  33. }
  34. /**
  35. * 流式响应回调
  36. */
  37. export type StreamCallback = (chunk: string, done: boolean) => void;
  38. /**
  39. * 嵌入向量选项
  40. */
  41. export interface EmbeddingOptions {
  42. model?: string;
  43. input: string | string[];
  44. dimensions?: number;
  45. }
  46. /**
  47. * 视觉理解选项
  48. */
  49. export interface VisionOptions {
  50. model?: string;
  51. prompt: string;
  52. imageUrl?: string;
  53. imageBase64?: string;
  54. maxTokens?: number;
  55. }
  56. /**
  57. * 函数定义
  58. */
  59. export interface FunctionDefinition {
  60. name: string;
  61. description: string;
  62. parameters: Record<string, unknown>;
  63. }
  64. /**
  65. * 工具调用结果
  66. */
  67. export interface ToolCallResult {
  68. id: string;
  69. function: {
  70. name: string;
  71. arguments: string;
  72. };
  73. }
  74. /**
  75. * AI 服务响应
  76. */
  77. export interface AIResponse {
  78. content: string;
  79. toolCalls?: ToolCallResult[];
  80. usage?: {
  81. promptTokens: number;
  82. completionTokens: number;
  83. totalTokens: number;
  84. };
  85. finishReason?: string;
  86. }
  87. /**
  88. * 登录状态分析结果
  89. */
  90. export interface LoginStatusAnalysis {
  91. isLoggedIn: boolean;
  92. hasVerification: boolean;
  93. verificationType?: 'captcha' | 'sms' | 'qrcode' | 'face' | 'slider' | 'other';
  94. verificationDescription?: string;
  95. pageDescription: string;
  96. suggestedAction?: string;
  97. }
  98. /**
  99. * 账号信息提取结果
  100. */
  101. export interface AccountInfoExtraction {
  102. found: boolean;
  103. accountName?: string;
  104. accountId?: string;
  105. avatarDescription?: string;
  106. fansCount?: string;
  107. worksCount?: string;
  108. otherInfo?: string;
  109. navigationGuide?: string;
  110. navigationSuggestion?: string;
  111. }
  112. /**
  113. * 页面操作指导结果
  114. */
  115. export interface PageOperationGuide {
  116. hasAction: boolean;
  117. actionType?: 'click' | 'input' | 'scroll' | 'wait' | 'navigate';
  118. targetDescription?: string;
  119. targetSelector?: string;
  120. targetPosition?: { x: number; y: number };
  121. inputText?: string;
  122. explanation: string;
  123. }
  124. /**
  125. * 发布状态分析结果
  126. */
  127. export interface PublishStatusAnalysis {
  128. status: 'uploading' | 'processing' | 'success' | 'failed' | 'need_captcha' | 'need_action';
  129. captchaType?: 'image' | 'sms' | 'slider' | 'other';
  130. captchaDescription?: string;
  131. errorMessage?: string;
  132. nextAction?: {
  133. actionType: 'click' | 'input' | 'wait';
  134. targetDescription: string;
  135. targetSelector?: string;
  136. };
  137. pageDescription: string;
  138. confidence: number; // 0-100 表示 AI 对判断的信心程度
  139. }
  140. /**
  141. * 阿里云百炼千问大模型 AI 服务类
  142. *
  143. * 支持功能:
  144. * - 聊天补全(Chat Completion)
  145. * - 流式输出(Streaming)
  146. * - 函数调用(Function Calling)
  147. * - 视觉理解(Vision)
  148. * - 文本嵌入(Embeddings)
  149. * - 多模型支持
  150. * - 自动重试机制
  151. */
  152. export class QwenAIService {
  153. private client: OpenAI | null = null;
  154. private models: typeof config.ai.models;
  155. constructor() {
  156. this.models = config.ai.models;
  157. if (config.ai.apiKey) {
  158. this.client = new OpenAI({
  159. apiKey: config.ai.apiKey,
  160. baseURL: config.ai.baseUrl,
  161. timeout: config.ai.timeout,
  162. maxRetries: config.ai.maxRetries,
  163. });
  164. logger.info('QwenAIService initialized', { baseUrl: config.ai.baseUrl });
  165. } else {
  166. logger.warn('QwenAIService: API key not configured');
  167. }
  168. }
  169. /**
  170. * 检查 AI 服务是否可用
  171. */
  172. isAvailable(): boolean {
  173. return !!this.client;
  174. }
  175. /**
  176. * 获取可用模型列表
  177. */
  178. getAvailableModels(): typeof config.ai.models {
  179. return this.models;
  180. }
  181. /**
  182. * 确保服务可用
  183. */
  184. private ensureAvailable(): void {
  185. if (!this.client) {
  186. throw new Error('AI service not configured. Please set DASHSCOPE_API_KEY environment variable.');
  187. }
  188. }
  189. // ==================== 核心 API 方法 ====================
  190. /**
  191. * 聊天补全 - 基础方法
  192. * @param options 聊天选项
  193. * @returns AI 响应
  194. */
  195. async chatCompletion(options: ChatCompletionOptions): Promise<AIResponse> {
  196. this.ensureAvailable();
  197. const {
  198. model = this.models.chat,
  199. messages,
  200. temperature = 0.7,
  201. maxTokens = 2000,
  202. topP = 0.9,
  203. tools,
  204. toolChoice,
  205. responseFormat,
  206. stop,
  207. seed,
  208. } = options;
  209. const startTime = Date.now();
  210. const requestId = `chat_${Date.now()}_${Math.random().toString(36).substr(2, 9)}`;
  211. logger.info(`[AI] ========== Chat Completion Request ==========`);
  212. logger.info(`[AI] Request ID: ${requestId}`);
  213. logger.info(`[AI] Model: ${model}`);
  214. logger.info(`[AI] Messages: ${messages.length} 条`);
  215. logger.info(`[AI] Temperature: ${temperature}, MaxTokens: ${maxTokens}`);
  216. if (tools) logger.info(`[AI] Tools: ${tools.length} 个函数`);
  217. if (responseFormat) logger.info(`[AI] Response Format: ${responseFormat.type}`);
  218. try {
  219. const response = await this.client!.chat.completions.create({
  220. model,
  221. messages: messages as ChatCompletionMessageParam[],
  222. temperature,
  223. max_tokens: maxTokens,
  224. top_p: topP,
  225. tools,
  226. tool_choice: toolChoice,
  227. response_format: responseFormat,
  228. stop,
  229. seed,
  230. });
  231. const duration = Date.now() - startTime;
  232. const choice = response.choices[0];
  233. logger.info(`[AI] ========== Chat Completion Response ==========`);
  234. logger.info(`[AI] Request ID: ${requestId}`);
  235. logger.info(`[AI] Duration: ${duration}ms`);
  236. logger.info(`[AI] Finish Reason: ${choice?.finish_reason}`);
  237. if (response.usage) {
  238. logger.info(`[AI] Tokens - Prompt: ${response.usage.prompt_tokens}, Completion: ${response.usage.completion_tokens}, Total: ${response.usage.total_tokens}`);
  239. }
  240. logger.info(`[AI] Response Length: ${choice?.message?.content?.length || 0} 字符`);
  241. logger.info(`[AI] ==============================================`);
  242. return {
  243. content: choice?.message?.content || '',
  244. toolCalls: choice?.message?.tool_calls?.map(tc => ({
  245. id: tc.id,
  246. function: {
  247. name: tc.function.name,
  248. arguments: tc.function.arguments,
  249. },
  250. })),
  251. usage: response.usage ? {
  252. promptTokens: response.usage.prompt_tokens,
  253. completionTokens: response.usage.completion_tokens,
  254. totalTokens: response.usage.total_tokens,
  255. } : undefined,
  256. finishReason: choice?.finish_reason || undefined,
  257. };
  258. } catch (error) {
  259. const duration = Date.now() - startTime;
  260. logger.error(`[AI] ========== Chat Completion Error ==========`);
  261. logger.error(`[AI] Request ID: ${requestId}`);
  262. logger.error(`[AI] Duration: ${duration}ms`);
  263. logger.error(`[AI] Error:`, error);
  264. logger.error(`[AI] ============================================`);
  265. throw this.handleError(error);
  266. }
  267. }
  268. /**
  269. * 流式聊天补全
  270. * @param options 聊天选项
  271. * @param callback 流式回调
  272. * @returns 完整的响应内容
  273. */
  274. async chatCompletionStream(
  275. options: Omit<ChatCompletionOptions, 'stream'>,
  276. callback: StreamCallback
  277. ): Promise<string> {
  278. this.ensureAvailable();
  279. const {
  280. model = this.models.chat,
  281. messages,
  282. temperature = 0.7,
  283. maxTokens = 2000,
  284. topP = 0.9,
  285. stop,
  286. } = options;
  287. const startTime = Date.now();
  288. const requestId = `stream_${Date.now()}_${Math.random().toString(36).substr(2, 9)}`;
  289. logger.info(`[AI] ========== Stream Chat Request ==========`);
  290. logger.info(`[AI] Request ID: ${requestId}`);
  291. logger.info(`[AI] Model: ${model}`);
  292. logger.info(`[AI] Messages: ${messages.length} 条`);
  293. logger.info(`[AI] Temperature: ${temperature}, MaxTokens: ${maxTokens}`);
  294. try {
  295. const stream = await this.client!.chat.completions.create({
  296. model,
  297. messages: messages as ChatCompletionMessageParam[],
  298. temperature,
  299. max_tokens: maxTokens,
  300. top_p: topP,
  301. stop,
  302. stream: true,
  303. });
  304. let fullContent = '';
  305. for await (const chunk of stream) {
  306. const delta = chunk.choices[0]?.delta?.content || '';
  307. if (delta) {
  308. fullContent += delta;
  309. callback(delta, false);
  310. }
  311. }
  312. callback('', true);
  313. const duration = Date.now() - startTime;
  314. logger.info(`[AI] ========== Stream Chat Response ==========`);
  315. logger.info(`[AI] Request ID: ${requestId}`);
  316. logger.info(`[AI] Duration: ${duration}ms`);
  317. logger.info(`[AI] Response Length: ${fullContent.length} 字符`);
  318. logger.info(`[AI] =============================================`);
  319. return fullContent;
  320. } catch (error) {
  321. const duration = Date.now() - startTime;
  322. logger.error(`[AI] ========== Stream Chat Error ==========`);
  323. logger.error(`[AI] Request ID: ${requestId}`);
  324. logger.error(`[AI] Duration: ${duration}ms`);
  325. logger.error(`[AI] Error:`, error);
  326. logger.error(`[AI] =========================================`);
  327. throw this.handleError(error);
  328. }
  329. }
  330. /**
  331. * 简单对话 - 便捷方法
  332. * @param prompt 用户提示
  333. * @param systemPrompt 系统提示(可选)
  334. * @param model 模型(可选)
  335. * @returns AI 回复内容
  336. */
  337. async chat(prompt: string, systemPrompt?: string, model?: string): Promise<string> {
  338. const messages: ChatMessage[] = [];
  339. if (systemPrompt) {
  340. messages.push({ role: 'system', content: systemPrompt });
  341. }
  342. messages.push({ role: 'user', content: prompt });
  343. const response = await this.chatCompletion({
  344. model: model || this.models.chat,
  345. messages,
  346. });
  347. return response.content;
  348. }
  349. /**
  350. * 快速对话 - 使用快速模型
  351. */
  352. async quickChat(prompt: string, systemPrompt?: string): Promise<string> {
  353. return this.chat(prompt, systemPrompt, this.models.fast);
  354. }
  355. /**
  356. * 推理对话 - 使用推理模型(适合复杂逻辑问题)
  357. */
  358. async reasoningChat(prompt: string, systemPrompt?: string): Promise<string> {
  359. return this.chat(prompt, systemPrompt, this.models.reasoning);
  360. }
  361. /**
  362. * 代码生成/分析 - 使用代码模型
  363. */
  364. async codeChat(prompt: string, systemPrompt?: string): Promise<string> {
  365. const defaultCodeSystemPrompt = '你是一个专业的编程助手,擅长代码编写、分析和调试。请提供清晰、高效的代码解决方案。';
  366. return this.chat(prompt, systemPrompt || defaultCodeSystemPrompt, this.models.coder);
  367. }
  368. // ==================== 函数调用 ====================
  369. /**
  370. * 带函数调用的对话
  371. * @param messages 消息列表
  372. * @param functions 函数定义列表
  373. * @param toolChoice 工具选择策略
  374. */
  375. async chatWithFunctions(
  376. messages: ChatMessage[],
  377. functions: FunctionDefinition[],
  378. toolChoice: 'auto' | 'none' | string = 'auto'
  379. ): Promise<AIResponse> {
  380. const tools: ChatCompletionTool[] = functions.map(fn => ({
  381. type: 'function' as const,
  382. function: {
  383. name: fn.name,
  384. description: fn.description,
  385. parameters: fn.parameters,
  386. },
  387. }));
  388. const choice = toolChoice === 'auto' || toolChoice === 'none'
  389. ? toolChoice
  390. : { type: 'function' as const, function: { name: toolChoice } };
  391. return this.chatCompletion({
  392. messages,
  393. tools,
  394. toolChoice: choice,
  395. });
  396. }
  397. /**
  398. * 执行函数调用循环
  399. * @param messages 初始消息
  400. * @param functions 函数定义
  401. * @param functionExecutor 函数执行器
  402. * @param maxIterations 最大迭代次数
  403. */
  404. async runFunctionLoop(
  405. messages: ChatMessage[],
  406. functions: FunctionDefinition[],
  407. functionExecutor: (name: string, args: Record<string, unknown>) => Promise<string>,
  408. maxIterations: number = 10
  409. ): Promise<string> {
  410. const conversationMessages = [...messages];
  411. for (let i = 0; i < maxIterations; i++) {
  412. const response = await this.chatWithFunctions(conversationMessages, functions);
  413. if (!response.toolCalls || response.toolCalls.length === 0) {
  414. return response.content;
  415. }
  416. // 添加助手消息(包含工具调用)
  417. conversationMessages.push({
  418. role: 'assistant',
  419. content: response.content || '',
  420. });
  421. // 执行每个工具调用
  422. for (const toolCall of response.toolCalls) {
  423. try {
  424. const args = JSON.parse(toolCall.function.arguments);
  425. const result = await functionExecutor(toolCall.function.name, args);
  426. conversationMessages.push({
  427. role: 'tool',
  428. content: result,
  429. tool_call_id: toolCall.id,
  430. });
  431. } catch (error) {
  432. conversationMessages.push({
  433. role: 'tool',
  434. content: `Error: ${error instanceof Error ? error.message : 'Unknown error'}`,
  435. tool_call_id: toolCall.id,
  436. });
  437. }
  438. }
  439. }
  440. throw new Error('Function loop exceeded maximum iterations');
  441. }
  442. // ==================== 视觉理解 ====================
  443. /**
  444. * 图像理解
  445. * @param options 视觉选项
  446. */
  447. async analyzeImage(options: VisionOptions): Promise<string> {
  448. this.ensureAvailable();
  449. const { model = this.models.vision, prompt, imageUrl, imageBase64, maxTokens = 1000 } = options;
  450. if (!imageUrl && !imageBase64) {
  451. throw new Error('Either imageUrl or imageBase64 must be provided');
  452. }
  453. const imageContent = imageUrl
  454. ? { type: 'image_url' as const, image_url: { url: imageUrl } }
  455. : { type: 'image_url' as const, image_url: { url: `data:image/jpeg;base64,${imageBase64}` } };
  456. const startTime = Date.now();
  457. const requestId = `vision_${Date.now()}_${Math.random().toString(36).substr(2, 9)}`;
  458. const imageSize = imageBase64 ? Math.round(imageBase64.length / 1024) : 0;
  459. logger.info(`[AI] ========== Vision Analysis Request ==========`);
  460. logger.info(`[AI] Request ID: ${requestId}`);
  461. logger.info(`[AI] Model: ${model}`);
  462. logger.info(`[AI] Image: ${imageUrl ? 'URL' : `Base64 (${imageSize}KB)`}`);
  463. logger.info(`[AI] Prompt: ${prompt.substring(0, 100)}${prompt.length > 100 ? '...' : ''}`);
  464. logger.info(`[AI] MaxTokens: ${maxTokens}`);
  465. try {
  466. const response = await this.client!.chat.completions.create({
  467. model,
  468. messages: [
  469. {
  470. role: 'user',
  471. content: [
  472. imageContent,
  473. { type: 'text', text: prompt },
  474. ],
  475. },
  476. ],
  477. max_tokens: maxTokens,
  478. });
  479. const duration = Date.now() - startTime;
  480. const content = response.choices[0]?.message?.content || '';
  481. logger.info(`[AI] ========== Vision Analysis Response ==========`);
  482. logger.info(`[AI] Request ID: ${requestId}`);
  483. logger.info(`[AI] Duration: ${duration}ms`);
  484. logger.info(`[AI] Finish Reason: ${response.choices[0]?.finish_reason}`);
  485. if (response.usage) {
  486. logger.info(`[AI] Tokens - Prompt: ${response.usage.prompt_tokens}, Completion: ${response.usage.completion_tokens}, Total: ${response.usage.total_tokens}`);
  487. }
  488. logger.info(`[AI] Response Length: ${content.length} 字符`);
  489. logger.info(`[AI] Response Preview: ${content.substring(0, 150)}${content.length > 150 ? '...' : ''}`);
  490. logger.info(`[AI] ================================================`);
  491. return content;
  492. } catch (error) {
  493. const duration = Date.now() - startTime;
  494. logger.error(`[AI] ========== Vision Analysis Error ==========`);
  495. logger.error(`[AI] Request ID: ${requestId}`);
  496. logger.error(`[AI] Duration: ${duration}ms`);
  497. logger.error(`[AI] Error:`, error);
  498. logger.error(`[AI] =============================================`);
  499. throw this.handleError(error);
  500. }
  501. }
  502. // ==================== 文本嵌入 ====================
  503. /**
  504. * 生成文本嵌入向量
  505. * @param options 嵌入选项
  506. */
  507. async createEmbedding(options: EmbeddingOptions): Promise<number[][]> {
  508. this.ensureAvailable();
  509. const { model = this.models.embedding, input, dimensions } = options;
  510. const startTime = Date.now();
  511. const requestId = `embed_${Date.now()}_${Math.random().toString(36).substr(2, 9)}`;
  512. const inputCount = Array.isArray(input) ? input.length : 1;
  513. logger.info(`[AI] ========== Embedding Request ==========`);
  514. logger.info(`[AI] Request ID: ${requestId}`);
  515. logger.info(`[AI] Model: ${model}`);
  516. logger.info(`[AI] Input Count: ${inputCount}`);
  517. if (dimensions) logger.info(`[AI] Dimensions: ${dimensions}`);
  518. try {
  519. const response = await this.client!.embeddings.create({
  520. model,
  521. input,
  522. dimensions,
  523. });
  524. const duration = Date.now() - startTime;
  525. logger.info(`[AI] ========== Embedding Response ==========`);
  526. logger.info(`[AI] Request ID: ${requestId}`);
  527. logger.info(`[AI] Duration: ${duration}ms`);
  528. logger.info(`[AI] Vectors: ${response.data.length}`);
  529. logger.info(`[AI] Vector Dimension: ${response.data[0]?.embedding?.length || 0}`);
  530. logger.info(`[AI] ==========================================`);
  531. return response.data.map(item => item.embedding);
  532. } catch (error) {
  533. const duration = Date.now() - startTime;
  534. logger.error(`[AI] ========== Embedding Error ==========`);
  535. logger.error(`[AI] Request ID: ${requestId}`);
  536. logger.error(`[AI] Duration: ${duration}ms`);
  537. logger.error(`[AI] Error:`, error);
  538. logger.error(`[AI] ======================================`);
  539. throw this.handleError(error);
  540. }
  541. }
  542. /**
  543. * 计算文本相似度
  544. * @param text1 文本1
  545. * @param text2 文本2
  546. * @returns 相似度分数 (0-1)
  547. */
  548. async calculateSimilarity(text1: string, text2: string): Promise<number> {
  549. const embeddings = await this.createEmbedding({ input: [text1, text2] });
  550. return this.cosineSimilarity(embeddings[0], embeddings[1]);
  551. }
  552. // ==================== JSON 结构化输出 ====================
  553. /**
  554. * 生成 JSON 结构化响应
  555. * @param prompt 提示
  556. * @param schema JSON Schema 描述
  557. * @param systemPrompt 系统提示
  558. */
  559. async generateJSON<T = unknown>(
  560. prompt: string,
  561. schema: string,
  562. systemPrompt?: string
  563. ): Promise<T> {
  564. const systemMessage = systemPrompt ||
  565. `你是一个数据处理助手。请严格按照用户要求的JSON格式输出,不要添加任何额外的说明文字。
  566. 输出格式要求:${schema}`;
  567. const response = await this.chatCompletion({
  568. messages: [
  569. { role: 'system', content: systemMessage },
  570. { role: 'user', content: prompt },
  571. ],
  572. responseFormat: { type: 'json_object' },
  573. temperature: 0.3,
  574. });
  575. try {
  576. return JSON.parse(response.content) as T;
  577. } catch {
  578. // 尝试提取 JSON
  579. const jsonMatch = response.content.match(/\{[\s\S]*\}|\[[\s\S]*\]/);
  580. if (jsonMatch) {
  581. return JSON.parse(jsonMatch[0]) as T;
  582. }
  583. throw new Error('Failed to parse JSON response');
  584. }
  585. }
  586. // ==================== 业务场景方法 ====================
  587. /**
  588. * 生成视频标题
  589. */
  590. async generateTitle(params: {
  591. description: string;
  592. platform: string;
  593. maxLength?: number;
  594. }): Promise<string[]> {
  595. const { description, platform, maxLength = 50 } = params;
  596. const response = await this.chat(
  597. `请根据以下视频内容描述,生成5个不同风格的标题:\n${description}`,
  598. `你是一个专业的自媒体运营专家,擅长为${platform}平台创作吸引人的标题。标题长度不超过${maxLength}个字符。每个标题独占一行,不要添加序号。`
  599. );
  600. const titles = response
  601. .split('\n')
  602. .filter(line => line.trim())
  603. .map(line => line.replace(/^\d+[\.\、\)]\s*/, '').trim())
  604. .filter(title => title.length > 0 && title.length <= maxLength);
  605. return titles.slice(0, 5);
  606. }
  607. /**
  608. * 生成标签推荐
  609. */
  610. async generateTags(params: {
  611. title: string;
  612. description?: string;
  613. platform: string;
  614. maxTags?: number;
  615. }): Promise<string[]> {
  616. const { title, description, platform, maxTags = 5 } = params;
  617. const response = await this.chat(
  618. `请根据以下视频信息推荐${maxTags}个相关标签:\n标题:${title}\n${description ? `描述:${description}` : ''}`,
  619. `你是一个专业的自媒体运营专家,擅长为${platform}平台选择热门标签。每个标签不要带#号,用逗号分隔,只输出标签不要其他内容。`
  620. );
  621. const tags = response
  622. .split(/[,,\n]/)
  623. .map(tag => tag.trim().replace(/^#/, ''))
  624. .filter(tag => tag.length > 0);
  625. return tags.slice(0, maxTags);
  626. }
  627. /**
  628. * 优化内容描述
  629. */
  630. async optimizeDescription(params: {
  631. original: string;
  632. platform: string;
  633. maxLength?: number;
  634. }): Promise<string> {
  635. const { original, platform, maxLength = 500 } = params;
  636. const response = await this.chat(
  637. `请优化以下视频描述:\n${original}`,
  638. `你是一个专业的自媒体运营专家,擅长为${platform}平台优化视频描述。优化后的描述要吸引人、有互动性,长度不超过${maxLength}个字符。直接输出优化后的描述,不要添加其他说明。`
  639. );
  640. return response.slice(0, maxLength);
  641. }
  642. /**
  643. * 生成评论回复
  644. */
  645. async generateReply(params: {
  646. comment: string;
  647. authorName: string;
  648. context?: string;
  649. tone?: 'friendly' | 'professional' | 'humorous';
  650. }): Promise<string[]> {
  651. const { comment, authorName, context, tone = 'friendly' } = params;
  652. const toneDesc = {
  653. friendly: '友好亲切',
  654. professional: '专业正式',
  655. humorous: '幽默风趣',
  656. };
  657. const response = await this.chat(
  658. `粉丝"${authorName}"的评论:${comment}\n${context ? `视频内容:${context}` : ''}\n请生成3个不同的回复选项,每个回复独占一行。`,
  659. `你是一个自媒体账号运营人员,需要用${toneDesc[tone]}的语气回复粉丝评论。回复要简洁、有互动性,表达对粉丝的感谢。`
  660. );
  661. const replies = response
  662. .split('\n')
  663. .filter(line => line.trim())
  664. .map(line => line.replace(/^\d+[\.\、\)]\s*/, '').trim())
  665. .filter(reply => reply.length > 0);
  666. return replies.slice(0, 3);
  667. }
  668. /**
  669. * 推荐最佳发布时间
  670. */
  671. async recommendPublishTime(params: {
  672. platform: string;
  673. contentType: string;
  674. targetAudience?: string;
  675. }): Promise<{ time: string; reason: string }[]> {
  676. const { platform, contentType, targetAudience } = params;
  677. try {
  678. const result = await this.generateJSON<{ time: string; reason: string }[]>(
  679. `请为${platform}平台的${contentType}类型内容推荐3个最佳发布时间。${targetAudience ? `目标受众:${targetAudience}` : ''}`,
  680. '返回JSON数组,每项包含time(HH:mm格式)和reason字段',
  681. '你是一个数据分析专家,熟悉各自媒体平台的用户活跃规律。'
  682. );
  683. return Array.isArray(result) ? result : [];
  684. } catch {
  685. return [
  686. { time: '12:00', reason: '午休时间,用户活跃度高' },
  687. { time: '18:00', reason: '下班时间,通勤路上刷手机' },
  688. { time: '21:00', reason: '晚间黄金时段,用户放松娱乐' },
  689. ];
  690. }
  691. }
  692. /**
  693. * 内容审核
  694. */
  695. async moderateContent(content: string): Promise<{
  696. safe: boolean;
  697. categories: string[];
  698. suggestion: string;
  699. }> {
  700. try {
  701. return await this.generateJSON(
  702. `请审核以下内容是否合规:\n${content}`,
  703. '返回JSON对象,包含safe(布尔值)、categories(问题类别数组,如空则为[])、suggestion(修改建议)字段',
  704. '你是一个内容审核专家,需要检查内容是否包含:违法违规、色情低俗、暴力血腥、政治敏感、虚假信息等问题。'
  705. );
  706. } catch {
  707. return { safe: true, categories: [], suggestion: '' };
  708. }
  709. }
  710. /**
  711. * 内容摘要生成
  712. */
  713. async summarize(content: string, maxLength: number = 200): Promise<string> {
  714. return this.quickChat(
  715. `请将以下内容总结为不超过${maxLength}字的摘要:\n${content}`,
  716. '你是一个专业的文字编辑,擅长提炼核心内容。直接输出摘要,不要添加任何前缀。'
  717. );
  718. }
  719. /**
  720. * 文本翻译
  721. */
  722. async translate(
  723. text: string,
  724. targetLang: string = '英文',
  725. sourceLang?: string
  726. ): Promise<string> {
  727. const prompt = sourceLang
  728. ? `请将以下${sourceLang}文本翻译成${targetLang}:\n${text}`
  729. : `请将以下文本翻译成${targetLang}:\n${text}`;
  730. return this.chat(prompt, '你是一个专业翻译,请提供准确、自然的翻译。直接输出翻译结果。');
  731. }
  732. /**
  733. * 关键词提取
  734. */
  735. async extractKeywords(text: string, count: number = 5): Promise<string[]> {
  736. const response = await this.chat(
  737. `请从以下文本中提取${count}个关键词,用逗号分隔:\n${text}`,
  738. '你是一个文本分析专家。只输出关键词,不要其他内容。'
  739. );
  740. return response.split(/[,,]/).map(k => k.trim()).filter(k => k).slice(0, count);
  741. }
  742. // ==================== 登录页面分析 ====================
  743. /**
  744. * 分析登录页面状态
  745. * @param imageBase64 页面截图的 Base64 编码
  746. * @param platform 平台名称
  747. * @returns 登录状态分析结果
  748. */
  749. async analyzeLoginStatus(imageBase64: string, platform: string): Promise<LoginStatusAnalysis> {
  750. const prompt = `请分析这张${platform}平台的网页截图,判断以下内容:
  751. 1. 用户是否已经登录成功?(判断依据:是否能看到用户头像、用户名、个人中心入口、创作者后台等已登录状态的元素)
  752. 2. 页面上是否有验证码或其他二次验证?(如:图形验证码、滑块验证、短信验证码输入框、扫码验证、人脸识别提示等)
  753. 3. 如果有验证,是什么类型的验证?
  754. 4. 简要描述当前页面的状态
  755. 请严格按照以下JSON格式返回:
  756. {
  757. "isLoggedIn": true或false,
  758. "hasVerification": true或false,
  759. "verificationType": "captcha"或"sms"或"qrcode"或"face"或"slider"或"other"或null,
  760. "verificationDescription": "验证的具体描述,如果没有验证则为null",
  761. "pageDescription": "当前页面状态的简要描述",
  762. "suggestedAction": "建议用户进行的操作,如果不需要则为null"
  763. }`;
  764. try {
  765. const response = await this.analyzeImage({
  766. imageBase64,
  767. prompt,
  768. maxTokens: 500,
  769. });
  770. // 尝试解析 JSON
  771. const jsonMatch = response.match(/\{[\s\S]*\}/);
  772. if (jsonMatch) {
  773. const result = JSON.parse(jsonMatch[0]);
  774. return {
  775. isLoggedIn: Boolean(result.isLoggedIn),
  776. hasVerification: Boolean(result.hasVerification),
  777. verificationType: result.verificationType || undefined,
  778. verificationDescription: result.verificationDescription || undefined,
  779. pageDescription: result.pageDescription || '无法解析页面状态',
  780. suggestedAction: result.suggestedAction || undefined,
  781. };
  782. }
  783. // 如果无法解析 JSON,返回默认值
  784. return {
  785. isLoggedIn: false,
  786. hasVerification: false,
  787. pageDescription: response,
  788. };
  789. } catch (error) {
  790. logger.error('analyzeLoginStatus error:', error);
  791. return {
  792. isLoggedIn: false,
  793. hasVerification: false,
  794. pageDescription: '分析失败',
  795. };
  796. }
  797. }
  798. /**
  799. * 从页面截图中提取账号信息
  800. * @param imageBase64 页面截图的 Base64 编码
  801. * @param platform 平台名称
  802. * @returns 账号信息提取结果
  803. */
  804. async extractAccountInfo(imageBase64: string, platform: string): Promise<AccountInfoExtraction> {
  805. // 根据平台提供更具体的提示
  806. const platformHints: Record<string, string> = {
  807. baijiahao: `
  808. 百家号平台常见的账号信息位置:
  809. - 页面左上角或侧边栏可能显示作者名称和头像
  810. - 侧边栏"首页"或"个人中心"菜单可以进入账号信息页面
  811. - 头部右上角可能有用户头像和下拉菜单
  812. - 账号设置页面会显示完整的账号信息`,
  813. douyin: `
  814. 抖音平台常见的账号信息位置:
  815. - 页面顶部或右上角显示用户头像和昵称
  816. - 点击头像可以进入个人主页
  817. - 侧边栏可能有"我的"或"个人中心"入口`,
  818. xiaohongshu: `
  819. 小红书平台常见的账号信息位置和格式:
  820. - 页面右上角显示账号名称(昵称),通常在头像旁边
  821. - 账号名称附近或下方会显示"小红书号:xxxxxxx"或"小红书号:xxxxxxx",这是重要的账号ID
  822. - 小红书号通常是一串字母数字组合,如"ABC123456"
  823. - "粉丝 X" 或 "粉丝 X/500" 表示粉丝数量
  824. - 侧边栏有"笔记管理"、"数据看板"等入口
  825. - 如果看到类似ID格式的字符串(字母+数字组合),很可能就是小红书号`,
  826. kuaishou: `
  827. 快手平台常见的账号信息位置:
  828. - 页面顶部显示用户信息
  829. - 侧边栏有创作者中心入口`,
  830. weixin_video: `
  831. 视频号平台常见的账号信息位置和格式:
  832. - 页面顶部显示账号名称(如"轻尘网络")
  833. - 账号名称下方通常显示"视频号ID:xxxxxxx",这个ID是重要信息,请提取冒号后面的字符串
  834. - "视频 X" 表示作品数量(X是数字)→ 填入 worksCount
  835. - "关注者 X" 表示粉丝数量(X是数字)→ 填入 fansCount(注意:视频号用"关注者"表示粉丝)
  836. - 侧边栏有账号设置入口
  837. - 头像通常是圆形图片,显示在账号名称左侧`,
  838. };
  839. const platformHint = platformHints[platform] || '';
  840. const prompt = `请仔细分析这张${platform}平台的网页截图,尝试提取以下账号信息:
  841. 1. 账号名称/昵称(这是最重要的信息,请仔细查找页面上的用户名、作者名、昵称等)
  842. 2. 账号ID(如果可见)
  843. 3. 头像描述(如果可见)
  844. 4. 粉丝数量(如果可见)
  845. 5. 作品数量(如果可见)
  846. 6. 其他相关信息
  847. ${platformHint}
  848. 【重要提示 - 请务必仔细阅读】:
  849. - 账号名称可能显示在页面顶部、侧边栏、头像旁边等位置
  850. - 如果看到任何类似用户名或昵称的文字,请提取出来
  851. - 即使只找到账号名称,也请返回 found: true
  852. 【关于数据统计 - 请特别注意区分】:
  853. - "粉丝"/"粉丝数"/"followers"/"关注者" = 关注该账号的人数 → 填入 fansCount
  854. - "关注"/"关注数"/"following" = 该账号关注的人数 → 这是关注数,不要填入任何字段
  855. - "作品"/"作品数"/"视频"/"笔记"/"文章"/"posts"/"videos" = 该账号发布的内容数量 → 填入 worksCount
  856. - "获赞"/"点赞"/"likes" = 获得的点赞数 → 不要填入作品数
  857. 【各平台特殊说明】:
  858. - 视频号:页面上"关注者 X"中的X是粉丝数,"视频 X"中的X是作品数,"视频号ID:xxx"中冒号后的xxx是账号ID
  859. - 抖音:页面上"抖音号:xxx"或"抖音号:xxx"中冒号后的字符串是账号ID
  860. - 小红书:页面上"小红书号:xxx"或"小红书号:xxx"中冒号后的字符串是账号ID(通常是字母数字组合如ABC123456)
  861. 【关于弹窗遮挡】:
  862. - 如果页面有弹窗、对话框、遮罩层等遮挡了主要内容,请在返回结果中说明
  863. - 如果因为遮挡无法看清账号信息,请设置 found: false,并在 navigationGuide 中说明"页面有弹窗遮挡,请关闭弹窗后重试"
  864. 【常见错误 - 请避免】:
  865. - ❌ 不要把"关注数"(following)当成"作品数"
  866. - ❌ 不要把"获赞数"当成"作品数"
  867. - ✅ 作品数通常标注为"作品"、"视频"、"笔记"等
  868. - ✅ 视频号的"关注者"就是粉丝数
  869. - 如果页面上没有明确显示作品数量,请返回 worksCount: null
  870. 如果当前页面确实没有显示任何账号信息,请告诉我应该如何操作才能看到账号信息。
  871. 请严格按照以下JSON格式返回:
  872. {
  873. "found": true或false(是否找到账号信息,只要找到账号名称就算找到),
  874. "accountName": "账号名称/昵称,如果找不到则为null",
  875. "accountId": "账号ID(如视频号ID:xxx中的xxx、抖音号:xxx中的xxx、小红书号:xxx中的xxx),如果找不到则为null",
  876. "avatarDescription": "头像描述,如果看不到则为null",
  877. "fansCount": "粉丝数量(纯数字,如'关注者 1'则填1,'粉丝 100'则填100),如果看不到则为null",
  878. "worksCount": "作品数量(纯数字,如'视频 4'则填4,'作品 10'则填10),如果看不到或不确定则为null",
  879. "otherInfo": "其他相关信息,如果没有则为null",
  880. "navigationGuide": "如果没找到账号信息,请描述具体的操作步骤(如:点击左侧菜单的'个人中心'),如果已找到则为null"
  881. }`;
  882. try {
  883. const response = await this.analyzeImage({
  884. imageBase64,
  885. prompt,
  886. maxTokens: 600,
  887. });
  888. const jsonMatch = response.match(/\{[\s\S]*\}/);
  889. if (jsonMatch) {
  890. let jsonStr = jsonMatch[0];
  891. // 尝试修复常见的 JSON 格式问题
  892. try {
  893. // 1. 尝试直接解析
  894. const result = JSON.parse(jsonStr);
  895. return {
  896. found: Boolean(result.found),
  897. accountName: result.accountName || undefined,
  898. accountId: result.accountId || undefined,
  899. avatarDescription: result.avatarDescription || undefined,
  900. fansCount: result.fansCount || undefined,
  901. worksCount: result.worksCount || undefined,
  902. otherInfo: result.otherInfo || undefined,
  903. navigationGuide: result.navigationGuide || result.navigationSuggestion || undefined,
  904. };
  905. } catch {
  906. // 2. 修复单引号问题:将单引号替换为双引号(注意处理值中的单引号)
  907. // 先替换属性名的单引号
  908. jsonStr = jsonStr.replace(/'([^']+)':/g, '"$1":');
  909. // 替换值的单引号(排除已经是双引号的)
  910. jsonStr = jsonStr.replace(/:\s*'([^']*)'/g, ': "$1"');
  911. // 移除末尾多余的逗号
  912. jsonStr = jsonStr.replace(/,(\s*[}\]])/g, '$1');
  913. try {
  914. const result = JSON.parse(jsonStr);
  915. return {
  916. found: Boolean(result.found),
  917. accountName: result.accountName || undefined,
  918. accountId: result.accountId || undefined,
  919. avatarDescription: result.avatarDescription || undefined,
  920. fansCount: result.fansCount || undefined,
  921. worksCount: result.worksCount || undefined,
  922. otherInfo: result.otherInfo || undefined,
  923. navigationGuide: result.navigationGuide || result.navigationSuggestion || undefined,
  924. };
  925. } catch (innerError) {
  926. logger.error('extractAccountInfo JSON parse failed after fix attempt:', innerError);
  927. logger.debug('Original JSON:', jsonMatch[0]);
  928. logger.debug('Fixed JSON:', jsonStr);
  929. }
  930. }
  931. }
  932. return {
  933. found: false,
  934. navigationGuide: response,
  935. };
  936. } catch (error) {
  937. logger.error('extractAccountInfo error:', error);
  938. return {
  939. found: false,
  940. navigationGuide: '分析失败,请手动查看页面',
  941. };
  942. }
  943. }
  944. /**
  945. * 从页面截图和 HTML 中提取账号信息(增强版)
  946. * @param imageBase64 页面截图的 Base64 编码
  947. * @param html 页面 HTML 内容
  948. * @param platform 平台名称
  949. * @returns 账号信息提取结果
  950. */
  951. async extractAccountInfoWithHtml(
  952. imageBase64: string,
  953. html: string,
  954. platform: string
  955. ): Promise<AccountInfoExtraction> {
  956. // 截取 HTML 的关键部分(避免太长)
  957. const maxHtmlLength = 8000;
  958. let htmlSnippet = html;
  959. if (html.length > maxHtmlLength) {
  960. // 提取可能包含账号信息的部分
  961. const patterns = [
  962. /视频号ID[::]\s*([a-zA-Z0-9_]+)/,
  963. /finder-uniq-id[^>]*>([^<]+)/,
  964. /data-clipboard-text="([^"]+)"/,
  965. /class="[^"]*nickname[^"]*"[^>]*>([^<]+)/,
  966. /class="[^"]*avatar[^"]*"/,
  967. ];
  968. // 找到包含关键信息的片段
  969. let relevantParts: string[] = [];
  970. for (const pattern of patterns) {
  971. const match = html.match(pattern);
  972. if (match) {
  973. const index = match.index || 0;
  974. const start = Math.max(0, index - 200);
  975. const end = Math.min(html.length, index + 500);
  976. relevantParts.push(html.substring(start, end));
  977. }
  978. }
  979. if (relevantParts.length > 0) {
  980. htmlSnippet = relevantParts.join('\n...\n');
  981. } else {
  982. htmlSnippet = html.substring(0, maxHtmlLength);
  983. }
  984. }
  985. const prompt = `请分析以下${platform}平台的网页截图和 HTML 代码,提取账号信息。
  986. 【HTML 代码片段】
  987. \`\`\`html
  988. ${htmlSnippet}
  989. \`\`\`
  990. 【提取目标】
  991. 1. **账号ID**(最重要!):
  992. - 视频号:查找 "视频号ID:xxx" 或 HTML 中的 data-clipboard-text 属性、finder-uniq-id 元素
  993. - 示例:sphjl99GV2W1GgN(这种字母数字组合就是视频号ID)
  994. 2. 账号名称/昵称
  995. 3. 粉丝数量(视频号显示为"关注者 X")
  996. 4. 作品数量(视频号显示为"视频 X")
  997. 【特别注意】
  998. - 视频号ID 通常是一串字母数字组合,如 "sphjl99GV2W1GgN"
  999. - 在 HTML 中可能出现在 data-clipboard-text 属性中
  1000. - 或者在 class 为 finder-uniq-id 的元素内
  1001. 请严格按照以下 JSON 格式返回:
  1002. {
  1003. "found": true,
  1004. "accountId": "视频号ID(如 sphjl99GV2W1GgN,不要加前缀)",
  1005. "accountName": "账号名称",
  1006. "fansCount": "粉丝数(纯数字)",
  1007. "worksCount": "作品数(纯数字)"
  1008. }`;
  1009. try {
  1010. const response = await this.analyzeImage({
  1011. imageBase64,
  1012. prompt,
  1013. maxTokens: 500,
  1014. });
  1015. logger.info(`[AIService] extractAccountInfoWithHtml response:`, response);
  1016. const jsonMatch = response.match(/\{[\s\S]*\}/);
  1017. if (jsonMatch) {
  1018. try {
  1019. const result = JSON.parse(jsonMatch[0]);
  1020. return {
  1021. found: Boolean(result.found),
  1022. accountName: result.accountName || undefined,
  1023. accountId: result.accountId || undefined,
  1024. avatarDescription: result.avatarDescription || undefined,
  1025. fansCount: result.fansCount || undefined,
  1026. worksCount: result.worksCount || undefined,
  1027. otherInfo: result.otherInfo || undefined,
  1028. navigationGuide: result.navigationGuide || undefined,
  1029. };
  1030. } catch (parseError) {
  1031. logger.error('extractAccountInfoWithHtml JSON parse error:', parseError);
  1032. }
  1033. }
  1034. return { found: false };
  1035. } catch (error) {
  1036. logger.error('extractAccountInfoWithHtml error:', error);
  1037. return { found: false };
  1038. }
  1039. }
  1040. /**
  1041. * 获取页面操作指导
  1042. * @param imageBase64 页面截图的 Base64 编码
  1043. * @param platform 平台名称
  1044. * @param goal 操作目标(如:"获取账号信息"、"完成登录")
  1045. * @returns 页面操作指导
  1046. */
  1047. async getPageOperationGuide(
  1048. imageBase64: string,
  1049. platform: string,
  1050. goal: string
  1051. ): Promise<PageOperationGuide> {
  1052. // 根据目标添加特定的识别提示
  1053. let additionalHints = '';
  1054. if (goal.includes('上传') || goal.includes('upload')) {
  1055. additionalHints = `
  1056. 【重要】关于视频上传入口的识别提示:
  1057. - 百家号:上传区域通常是一个虚线边框的大区域,包含云朵图标和"点击上传或将文件拖动入此区域"文字,整个虚线框区域都是可点击的上传入口
  1058. - 微信视频号:
  1059. * 如果在首页,需要找"发表视频"按钮(通常是橙色/红色按钮)
  1060. * 如果在发布页面(标题显示"发表动态"),左侧会有一个带"+"号的矩形上传区域,下方有格式说明文字(如"上传时长8小时内...MP4/H.264格式"),点击这个"+"号区域即可上传
  1061. - 小红书:上传区域通常有"上传视频"文字或拖拽区域
  1062. - 抖音:上传区域通常是一个带有"发布视频"或上传图标的区域
  1063. - 快手:找"上传视频"或拖拽上传区域
  1064. - B站:找"投稿"或上传视频按钮
  1065. 如果页面不是发布页面,需要先找到进入发布页面的入口按钮(如"发表视频"、"发布"、"上传"、"投稿"等)。`;
  1066. }
  1067. const prompt = `请分析这张${platform}平台的网页截图,我的目标是:${goal}
  1068. ${additionalHints}
  1069. 请告诉我下一步应该进行什么操作。仔细观察页面上的所有可点击元素,包括:
  1070. - 带有虚线边框的拖拽上传区域(这是很常见的上传入口)
  1071. - 带有"上传"、"发布"、"发表"、"投稿"等文字的按钮
  1072. - 带有云朵、加号、上传箭头等图标的区域
  1073. 如果需要点击某个元素,请尽可能提供:
  1074. 1. 操作类型(点击、输入、滚动、等待、跳转)
  1075. 2. 目标元素的描述(文字内容、位置描述、视觉特征)
  1076. 3. 如果是点击操作,估计目标在截图中的大致位置(假设截图尺寸为 1920x1080,给出x,y坐标)
  1077. 4. 如果是输入操作,需要输入什么内容
  1078. 请严格按照以下JSON格式返回:
  1079. {
  1080. "hasAction": true或false(是否需要执行操作),
  1081. "actionType": "click"或"input"或"scroll"或"wait"或"navigate"或null,
  1082. "targetDescription": "目标元素的文字描述(如:虚线框上传区域、发表视频按钮等)",
  1083. "targetSelector": "可能的CSS选择器,常见的有:[class*='upload'], [class*='drag'], button:has-text('发布'), button:has-text('上传') 等",
  1084. "targetPosition": {"x": 数字, "y": 数字} 或 null,
  1085. "inputText": "需要输入的文字,如果不需要输入则为null",
  1086. "explanation": "操作说明和原因"
  1087. }`;
  1088. try {
  1089. const response = await this.analyzeImage({
  1090. imageBase64,
  1091. prompt,
  1092. maxTokens: 500,
  1093. });
  1094. const jsonMatch = response.match(/\{[\s\S]*\}/);
  1095. if (jsonMatch) {
  1096. const result = JSON.parse(jsonMatch[0]);
  1097. return {
  1098. hasAction: Boolean(result.hasAction),
  1099. actionType: result.actionType || undefined,
  1100. targetDescription: result.targetDescription || undefined,
  1101. targetSelector: result.targetSelector || undefined,
  1102. targetPosition: result.targetPosition || undefined,
  1103. inputText: result.inputText || undefined,
  1104. explanation: result.explanation || '无法解析操作指导',
  1105. };
  1106. }
  1107. return {
  1108. hasAction: false,
  1109. explanation: response,
  1110. };
  1111. } catch (error) {
  1112. logger.error('getPageOperationGuide error:', error);
  1113. return {
  1114. hasAction: false,
  1115. explanation: '分析失败',
  1116. };
  1117. }
  1118. }
  1119. /**
  1120. * 分析视频上传进度
  1121. * @param imageBase64 页面截图的base64
  1122. * @param platform 平台名称
  1123. * @returns 上传进度分析结果
  1124. */
  1125. async analyzeUploadProgress(
  1126. imageBase64: string,
  1127. platform: string
  1128. ): Promise<{
  1129. isUploading: boolean;
  1130. isComplete: boolean;
  1131. isFailed: boolean;
  1132. progress: number | null;
  1133. statusDescription: string;
  1134. }> {
  1135. const prompt = `请分析这张${platform}平台的网页截图,判断视频上传的状态。
  1136. 请仔细观察页面上是否有以下元素:
  1137. 1. 上传进度条(通常显示百分比,如 "50%"、"上传中 75%" 等)
  1138. 2. 上传完成标志(如 "上传成功"、"✓"、绿色勾选图标、"100%"、视频预览画面)
  1139. 3. 上传失败标志(如 "上传失败"、"重试"、红色错误提示)
  1140. 4. 视频处理中的提示(如 "处理中"、"转码中")
  1141. 请严格按照以下JSON格式返回:
  1142. {
  1143. "isUploading": true或false(是否正在上传中,有进度条显示但未完成),
  1144. "isComplete": true或false(是否上传完成,进度达到100%或显示成功标志),
  1145. "isFailed": true或false(是否上传失败),
  1146. "progress": 数字或null(当前上传进度百分比,如果能识别到的话,范围0-100),
  1147. "statusDescription": "当前状态的文字描述"
  1148. }`;
  1149. try {
  1150. const response = await this.analyzeImage({
  1151. imageBase64,
  1152. prompt,
  1153. maxTokens: 300,
  1154. });
  1155. const jsonMatch = response.match(/\{[\s\S]*\}/);
  1156. if (jsonMatch) {
  1157. const result = JSON.parse(jsonMatch[0]);
  1158. return {
  1159. isUploading: Boolean(result.isUploading),
  1160. isComplete: Boolean(result.isComplete),
  1161. isFailed: Boolean(result.isFailed),
  1162. progress: typeof result.progress === 'number' ? result.progress : null,
  1163. statusDescription: result.statusDescription || '未知状态',
  1164. };
  1165. }
  1166. return {
  1167. isUploading: false,
  1168. isComplete: false,
  1169. isFailed: false,
  1170. progress: null,
  1171. statusDescription: response,
  1172. };
  1173. } catch (error) {
  1174. logger.error('analyzeUploadProgress error:', error);
  1175. return {
  1176. isUploading: false,
  1177. isComplete: false,
  1178. isFailed: false,
  1179. progress: null,
  1180. statusDescription: '分析失败',
  1181. };
  1182. }
  1183. }
  1184. /**
  1185. * 分析点击发布按钮后的发布进度
  1186. * @param imageBase64 页面截图的base64
  1187. * @param platform 平台名称
  1188. * @returns 发布进度分析结果
  1189. */
  1190. async analyzePublishProgress(
  1191. imageBase64: string,
  1192. platform: string
  1193. ): Promise<{
  1194. isPublishing: boolean;
  1195. isComplete: boolean;
  1196. isFailed: boolean;
  1197. progress: number | null;
  1198. needAction: boolean;
  1199. actionDescription: string | null;
  1200. statusDescription: string;
  1201. }> {
  1202. const prompt = `请分析这张${platform}平台的网页截图,判断点击发布按钮后的发布状态。
  1203. 请仔细观察页面上是否有以下元素:
  1204. 1. 发布/上传进度条(通常显示百分比,如 "发布中 50%"、"上传中 79%"、"正在发布..."、进度圈等)
  1205. 2. 后台上传提示(如 "作品上传中,请勿关闭页面"、"上传完成后将自动发布" + 百分比进度)
  1206. 3. 发布成功标志(如 "发布成功"、"已发布"、绿色勾选图标)
  1207. 4. 发布失败标志(如 "发布失败"、"重试"、红色错误提示)
  1208. 5. 需要处理的弹窗(如确认弹窗、验证码弹窗、协议确认等)
  1209. 6. 正在处理中的提示(如 "视频处理中"、"审核中"、loading动画)
  1210. 【重要】特别注意:
  1211. - 抖音等平台在点击发布后可能会跳转到作品管理页面,但页面右下角会有一个小的上传进度框显示"作品上传中,请勿关闭页面 XX%"
  1212. - 只要页面上有任何上传/发布进度条(无论在页面哪个位置),都应该认为发布尚未完成
  1213. - 只有当进度达到100%且没有任何进度提示时,才算发布完成
  1214. 请严格按照以下JSON格式返回:
  1215. {
  1216. "isPublishing": true或false(是否正在发布/上传中,只要有进度条或loading显示就返回true),
  1217. "isComplete": true或false(是否完全完成,进度100%且无任何上传提示才返回true),
  1218. "isFailed": true或false(是否发布失败),
  1219. "progress": 数字或null(当前进度百分比,仔细查找页面上的百分比数字,范围0-100),
  1220. "needAction": true或false(是否需要用户处理某些操作,如确认弹窗),
  1221. "actionDescription": "需要执行的操作描述,如果不需要操作则为null",
  1222. "statusDescription": "当前状态的文字描述(包括进度信息)"
  1223. }`;
  1224. try {
  1225. const response = await this.analyzeImage({
  1226. imageBase64,
  1227. prompt,
  1228. maxTokens: 400,
  1229. });
  1230. const jsonMatch = response.match(/\{[\s\S]*\}/);
  1231. if (jsonMatch) {
  1232. const result = JSON.parse(jsonMatch[0]);
  1233. return {
  1234. isPublishing: Boolean(result.isPublishing),
  1235. isComplete: Boolean(result.isComplete),
  1236. isFailed: Boolean(result.isFailed),
  1237. progress: typeof result.progress === 'number' ? result.progress : null,
  1238. needAction: Boolean(result.needAction),
  1239. actionDescription: result.actionDescription || null,
  1240. statusDescription: result.statusDescription || '未知状态',
  1241. };
  1242. }
  1243. return {
  1244. isPublishing: false,
  1245. isComplete: false,
  1246. isFailed: false,
  1247. progress: null,
  1248. needAction: false,
  1249. actionDescription: null,
  1250. statusDescription: response,
  1251. };
  1252. } catch (error) {
  1253. logger.error('analyzePublishProgress error:', error);
  1254. return {
  1255. isPublishing: false,
  1256. isComplete: false,
  1257. isFailed: false,
  1258. progress: null,
  1259. needAction: false,
  1260. actionDescription: null,
  1261. statusDescription: '分析失败',
  1262. };
  1263. }
  1264. }
  1265. /**
  1266. * 通过 HTML 分析页面并返回操作指导
  1267. * @param html 页面 HTML 内容
  1268. * @param platform 平台名称
  1269. * @param goal 操作目标
  1270. * @returns 页面操作指导(包含精确的 CSS 选择器)
  1271. */
  1272. async analyzeHtmlForOperation(
  1273. html: string,
  1274. platform: string,
  1275. goal: string
  1276. ): Promise<PageOperationGuide> {
  1277. this.ensureAvailable();
  1278. // 简化 HTML,移除不必要的内容,保留关键元素
  1279. const simplifiedHtml = this.simplifyHtml(html);
  1280. const prompt = `你是一个网页自动化助手。我正在${platform}平台上操作,目标是:${goal}
  1281. 以下是当前页面的HTML结构(已简化):
  1282. \`\`\`html
  1283. ${simplifiedHtml}
  1284. \`\`\`
  1285. 请分析这个页面,告诉我下一步应该进行什么操作来达成目标。
  1286. 要求:
  1287. 1. 识别页面当前状态(是否已登录、是否有验证码、是否有弹窗等)
  1288. 2. 找出需要操作的目标元素(按钮、链接、输入框等)
  1289. 3. 提供精确的 CSS 选择器来定位该元素
  1290. 4. 选择器要尽可能唯一和稳定(优先使用 id、data-* 属性、唯一 class)
  1291. 请严格按照以下JSON格式返回:
  1292. {
  1293. "hasAction": true或false,
  1294. "actionType": "click" | "input" | "scroll" | "wait" | null,
  1295. "targetSelector": "精确的CSS选择器,如 #login-btn 或 button[data-action='login'] 或 .login-button",
  1296. "targetDescription": "目标元素的描述",
  1297. "inputText": "如果是输入操作,需要输入的内容,否则为null",
  1298. "explanation": "操作说明和当前页面状态分析"
  1299. }
  1300. 【重要】CSS选择器格式要求:
  1301. - 必须使用标准CSS选择器语法,不要使用jQuery语法
  1302. - 不要使用 :contains()(这是jQuery语法,不是标准CSS)
  1303. - 如果需要按文本匹配,使用 :has-text("文本") 格式,例如:button:has-text("发布")
  1304. - 优先使用 id 选择器,其次是 data-* 属性,再次是唯一的 class
  1305. - 如果有多个相似元素,使用更具体的选择器(如 :first-child, :nth-child(n))
  1306. - 如果页面已完成目标(如已登录成功),返回 hasAction: false`;
  1307. try {
  1308. const response = await this.chat(prompt, undefined, this.models.chat);
  1309. const jsonMatch = response.match(/\{[\s\S]*\}/);
  1310. if (jsonMatch) {
  1311. const result = JSON.parse(jsonMatch[0]);
  1312. return {
  1313. hasAction: Boolean(result.hasAction),
  1314. actionType: result.actionType || undefined,
  1315. targetDescription: result.targetDescription || undefined,
  1316. targetSelector: result.targetSelector || undefined,
  1317. inputText: result.inputText || undefined,
  1318. explanation: result.explanation || '无法解析操作指导',
  1319. };
  1320. }
  1321. return {
  1322. hasAction: false,
  1323. explanation: response,
  1324. };
  1325. } catch (error) {
  1326. logger.error('analyzeHtmlForOperation error:', error);
  1327. return {
  1328. hasAction: false,
  1329. explanation: '分析失败',
  1330. };
  1331. }
  1332. }
  1333. /**
  1334. * 简化 HTML,移除不必要的内容
  1335. */
  1336. private simplifyHtml(html: string): string {
  1337. let simplified = html;
  1338. // 移除 script 标签及内容
  1339. simplified = simplified.replace(/<script\b[^<]*(?:(?!<\/script>)<[^<]*)*<\/script>/gi, '');
  1340. // 移除 style 标签及内容
  1341. simplified = simplified.replace(/<style\b[^<]*(?:(?!<\/style>)<[^<]*)*<\/style>/gi, '');
  1342. // 移除 HTML 注释
  1343. simplified = simplified.replace(/<!--[\s\S]*?-->/g, '');
  1344. // 移除 svg 内容(保留标签)
  1345. simplified = simplified.replace(/<svg[^>]*>[\s\S]*?<\/svg>/gi, '<svg></svg>');
  1346. // 移除 noscript
  1347. simplified = simplified.replace(/<noscript\b[^<]*(?:(?!<\/noscript>)<[^<]*)*<\/noscript>/gi, '');
  1348. // 移除 data:image 等 base64 内容
  1349. simplified = simplified.replace(/data:[^"'\s]+/g, 'data:...');
  1350. // 移除过长的属性值(如内联样式)
  1351. simplified = simplified.replace(/style="[^"]{100,}"/gi, 'style="..."');
  1352. // 压缩连续空白
  1353. simplified = simplified.replace(/\s+/g, ' ');
  1354. // 限制总长度(避免 token 超限)
  1355. const maxLength = 30000;
  1356. if (simplified.length > maxLength) {
  1357. // 尝试保留 body 部分
  1358. const bodyMatch = simplified.match(/<body[^>]*>([\s\S]*)<\/body>/i);
  1359. if (bodyMatch) {
  1360. simplified = bodyMatch[1];
  1361. }
  1362. // 如果还是太长,截断
  1363. if (simplified.length > maxLength) {
  1364. simplified = simplified.substring(0, maxLength) + '\n... (HTML 已截断)';
  1365. }
  1366. }
  1367. return simplified.trim();
  1368. }
  1369. // ==================== 发布辅助 ====================
  1370. /**
  1371. * 分析发布页面状态
  1372. * @param imageBase64 页面截图的 Base64 编码
  1373. * @param platform 平台名称
  1374. * @returns 发布状态分析结果
  1375. */
  1376. async analyzePublishStatus(imageBase64: string, platform: string): Promise<PublishStatusAnalysis> {
  1377. const prompt = `请分析这张${platform}平台视频发布页面的截图,判断当前的发布状态。
  1378. 请仔细观察页面,判断:
  1379. 1. 是否正在上传/处理视频(显示进度条、loading 等)
  1380. 2. 是否发布成功(显示成功提示、跳转到作品列表等)
  1381. 3. 是否发布失败(显示错误提示)
  1382. 4. 是否需要输入验证码(图片验证码、滑块验证、短信验证码等)
  1383. 5. 是否需要进行其他操作才能继续发布(如点击发布按钮、确认信息等)
  1384. 请严格按照以下JSON格式返回:
  1385. {
  1386. "status": "uploading" 或 "processing" 或 "success" 或 "failed" 或 "need_captcha" 或 "need_action",
  1387. "captchaType": "image" 或 "sms" 或 "slider" 或 "other" 或 null(仅当 status 为 need_captcha 时填写),
  1388. "captchaDescription": "验证码的具体描述(如:请输入图片中的4位数字)",
  1389. "errorMessage": "错误信息(仅当 status 为 failed 时填写)",
  1390. "nextAction": {
  1391. "actionType": "click" 或 "input" 或 "wait",
  1392. "targetDescription": "需要操作的目标描述",
  1393. "targetSelector": "目标元素的CSS选择器(如果能推断的话)"
  1394. } 或 null,
  1395. "pageDescription": "当前页面状态的详细描述",
  1396. "confidence": 0-100 之间的数字,表示你对这个判断的信心程度
  1397. }
  1398. 注意:
  1399. - 如果看到"发布成功"、"上传完成"等字样,status 应为 "success"
  1400. - 如果看到验证码输入框、滑块验证等,status 应为 "need_captcha"
  1401. - 如果页面显示发布按钮但还未点击,status 应为 "need_action"
  1402. - 如果页面正在加载或显示进度,status 应为 "uploading" 或 "processing"`;
  1403. try {
  1404. const response = await this.analyzeImage({
  1405. imageBase64,
  1406. prompt,
  1407. maxTokens: 600,
  1408. });
  1409. const jsonMatch = response.match(/\{[\s\S]*\}/);
  1410. if (jsonMatch) {
  1411. const result = JSON.parse(jsonMatch[0]);
  1412. return {
  1413. status: result.status || 'need_action',
  1414. captchaType: result.captchaType || undefined,
  1415. captchaDescription: result.captchaDescription || undefined,
  1416. errorMessage: result.errorMessage || undefined,
  1417. nextAction: result.nextAction || undefined,
  1418. pageDescription: result.pageDescription || '无法解析页面状态',
  1419. confidence: result.confidence || 50,
  1420. };
  1421. }
  1422. return {
  1423. status: 'need_action',
  1424. pageDescription: response,
  1425. confidence: 30,
  1426. };
  1427. } catch (error) {
  1428. logger.error('analyzePublishStatus error:', error);
  1429. return {
  1430. status: 'need_action',
  1431. pageDescription: '分析失败',
  1432. confidence: 0,
  1433. };
  1434. }
  1435. }
  1436. /**
  1437. * 分析发布页面 HTML 并获取操作指导
  1438. * @param html 页面 HTML 内容
  1439. * @param platform 平台名称
  1440. * @param currentStatus 当前发布状态
  1441. * @returns 操作指导
  1442. */
  1443. async analyzePublishPageHtml(
  1444. html: string,
  1445. platform: string,
  1446. currentStatus: string
  1447. ): Promise<PageOperationGuide> {
  1448. const simplifiedHtml = this.simplifyHtml(html);
  1449. const prompt = `你是一个自动化发布助手。我正在${platform}平台上发布视频,当前状态是:${currentStatus}
  1450. 以下是当前页面的HTML结构(已简化):
  1451. \`\`\`html
  1452. ${simplifiedHtml}
  1453. \`\`\`
  1454. 请分析这个页面,告诉我下一步应该进行什么操作来完成发布。
  1455. 要求:
  1456. 1. 如果需要点击"发布"按钮,找到正确的发布按钮
  1457. 2. 如果需要输入验证码,找到验证码输入框
  1458. 3. 如果需要确认/关闭弹窗,找到相应按钮
  1459. 4. 提供精确的 CSS 选择器来定位目标元素
  1460. 请严格按照以下JSON格式返回:
  1461. {
  1462. "hasAction": true或false,
  1463. "actionType": "click" | "input" | "wait" | null,
  1464. "targetSelector": "精确的CSS选择器",
  1465. "targetDescription": "目标元素的描述",
  1466. "inputText": "如果是输入操作,需要输入的内容,否则为null",
  1467. "explanation": "操作说明"
  1468. }`;
  1469. try {
  1470. const response = await this.chat(prompt, undefined, this.models.chat);
  1471. const jsonMatch = response.match(/\{[\s\S]*\}/);
  1472. if (jsonMatch) {
  1473. const result = JSON.parse(jsonMatch[0]);
  1474. return {
  1475. hasAction: Boolean(result.hasAction),
  1476. actionType: result.actionType || undefined,
  1477. targetDescription: result.targetDescription || undefined,
  1478. targetSelector: result.targetSelector || undefined,
  1479. inputText: result.inputText || undefined,
  1480. explanation: result.explanation || '无法解析操作指导',
  1481. };
  1482. }
  1483. return {
  1484. hasAction: false,
  1485. explanation: response,
  1486. };
  1487. } catch (error) {
  1488. logger.error('analyzePublishPageHtml error:', error);
  1489. return {
  1490. hasAction: false,
  1491. explanation: '分析失败',
  1492. };
  1493. }
  1494. }
  1495. // ==================== 工具方法 ====================
  1496. /**
  1497. * 计算余弦相似度
  1498. */
  1499. private cosineSimilarity(vec1: number[], vec2: number[]): number {
  1500. if (vec1.length !== vec2.length) {
  1501. throw new Error('Vectors must have the same length');
  1502. }
  1503. let dotProduct = 0;
  1504. let norm1 = 0;
  1505. let norm2 = 0;
  1506. for (let i = 0; i < vec1.length; i++) {
  1507. dotProduct += vec1[i] * vec2[i];
  1508. norm1 += vec1[i] * vec1[i];
  1509. norm2 += vec2[i] * vec2[i];
  1510. }
  1511. return dotProduct / (Math.sqrt(norm1) * Math.sqrt(norm2));
  1512. }
  1513. /**
  1514. * 错误处理
  1515. */
  1516. private handleError(error: unknown): Error {
  1517. if (error instanceof OpenAI.APIError) {
  1518. const status = error.status;
  1519. const message = error.message;
  1520. switch (status) {
  1521. case 400:
  1522. return new Error(`请求参数错误: ${message}`);
  1523. case 401:
  1524. return new Error('API Key 无效或已过期');
  1525. case 403:
  1526. return new Error('没有权限访问该模型');
  1527. case 404:
  1528. return new Error('请求的模型不存在');
  1529. case 429:
  1530. return new Error('请求频率超限,请稍后重试');
  1531. case 500:
  1532. return new Error('服务器内部错误,请稍后重试');
  1533. default:
  1534. return new Error(`API 错误 (${status}): ${message}`);
  1535. }
  1536. }
  1537. if (error instanceof Error) {
  1538. return error;
  1539. }
  1540. return new Error('未知错误');
  1541. }
  1542. }
  1543. // 导出单例实例
  1544. export const aiService = new QwenAIService();
  1545. // 为了向后兼容,也导出别名
  1546. export const AIService = QwenAIService;