HeadlessBrowserService.ts 207 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401240224032404240524062407240824092410241124122413241424152416241724182419242024212422242324242425242624272428242924302431243224332434243524362437243824392440244124422443244424452446244724482449245024512452245324542455245624572458245924602461246224632464246524662467246824692470247124722473247424752476247724782479248024812482248324842485248624872488248924902491249224932494249524962497249824992500250125022503250425052506250725082509251025112512251325142515251625172518251925202521252225232524252525262527252825292530253125322533253425352536253725382539254025412542254325442545254625472548254925502551255225532554255525562557255825592560256125622563256425652566256725682569257025712572257325742575257625772578257925802581258225832584258525862587258825892590259125922593259425952596259725982599260026012602260326042605260626072608260926102611261226132614261526162617261826192620262126222623262426252626262726282629263026312632263326342635263626372638263926402641264226432644264526462647264826492650265126522653265426552656265726582659266026612662266326642665266626672668266926702671267226732674267526762677267826792680268126822683268426852686268726882689269026912692269326942695269626972698269927002701270227032704270527062707270827092710271127122713271427152716271727182719272027212722272327242725272627272728272927302731273227332734273527362737273827392740274127422743274427452746274727482749275027512752275327542755275627572758275927602761276227632764276527662767276827692770277127722773277427752776277727782779278027812782278327842785278627872788278927902791279227932794279527962797279827992800280128022803280428052806280728082809281028112812281328142815281628172818281928202821282228232824282528262827282828292830283128322833283428352836283728382839284028412842284328442845284628472848284928502851285228532854285528562857285828592860286128622863286428652866286728682869287028712872287328742875287628772878287928802881288228832884288528862887288828892890289128922893289428952896289728982899290029012902290329042905290629072908290929102911291229132914291529162917291829192920292129222923292429252926292729282929293029312932293329342935293629372938293929402941294229432944294529462947294829492950295129522953295429552956295729582959296029612962296329642965296629672968296929702971297229732974297529762977297829792980298129822983298429852986298729882989299029912992299329942995299629972998299930003001300230033004300530063007300830093010301130123013301430153016301730183019302030213022302330243025302630273028302930303031303230333034303530363037303830393040304130423043304430453046304730483049305030513052305330543055305630573058305930603061306230633064306530663067306830693070307130723073307430753076307730783079308030813082308330843085308630873088308930903091309230933094309530963097309830993100310131023103310431053106310731083109311031113112311331143115311631173118311931203121312231233124312531263127312831293130313131323133313431353136313731383139314031413142314331443145314631473148314931503151315231533154315531563157315831593160316131623163316431653166316731683169317031713172317331743175317631773178317931803181318231833184318531863187318831893190319131923193319431953196319731983199320032013202320332043205320632073208320932103211321232133214321532163217321832193220322132223223322432253226322732283229323032313232323332343235323632373238323932403241324232433244324532463247324832493250325132523253325432553256325732583259326032613262326332643265326632673268326932703271327232733274327532763277327832793280328132823283328432853286328732883289329032913292329332943295329632973298329933003301330233033304330533063307330833093310331133123313331433153316331733183319332033213322332333243325332633273328332933303331333233333334333533363337333833393340334133423343334433453346334733483349335033513352335333543355335633573358335933603361336233633364336533663367336833693370337133723373337433753376337733783379338033813382338333843385338633873388338933903391339233933394339533963397339833993400340134023403340434053406340734083409341034113412341334143415341634173418341934203421342234233424342534263427342834293430343134323433343434353436343734383439344034413442344334443445344634473448344934503451345234533454345534563457345834593460346134623463346434653466346734683469347034713472347334743475347634773478347934803481348234833484348534863487348834893490349134923493349434953496349734983499350035013502350335043505350635073508350935103511351235133514351535163517351835193520352135223523352435253526352735283529353035313532353335343535353635373538353935403541354235433544354535463547354835493550355135523553355435553556355735583559356035613562356335643565356635673568356935703571357235733574357535763577357835793580358135823583358435853586358735883589359035913592359335943595359635973598359936003601360236033604360536063607360836093610361136123613361436153616361736183619362036213622362336243625362636273628362936303631363236333634363536363637363836393640364136423643364436453646364736483649365036513652365336543655365636573658365936603661366236633664366536663667366836693670367136723673367436753676367736783679368036813682368336843685368636873688368936903691369236933694369536963697369836993700370137023703370437053706370737083709371037113712371337143715371637173718371937203721372237233724372537263727372837293730373137323733373437353736373737383739374037413742374337443745374637473748374937503751375237533754375537563757375837593760376137623763376437653766376737683769377037713772377337743775377637773778377937803781378237833784378537863787378837893790379137923793379437953796379737983799380038013802380338043805380638073808380938103811381238133814381538163817381838193820382138223823382438253826382738283829383038313832383338343835383638373838383938403841384238433844384538463847384838493850385138523853385438553856385738583859386038613862386338643865386638673868386938703871387238733874387538763877387838793880388138823883388438853886388738883889389038913892389338943895389638973898389939003901390239033904390539063907390839093910391139123913391439153916391739183919392039213922392339243925392639273928392939303931393239333934393539363937393839393940394139423943394439453946394739483949395039513952395339543955395639573958395939603961396239633964396539663967396839693970397139723973397439753976397739783979398039813982398339843985398639873988398939903991399239933994399539963997399839994000400140024003400440054006400740084009401040114012401340144015401640174018401940204021402240234024402540264027402840294030403140324033403440354036403740384039404040414042404340444045404640474048404940504051405240534054405540564057405840594060406140624063406440654066406740684069407040714072407340744075407640774078407940804081408240834084408540864087408840894090409140924093409440954096409740984099410041014102410341044105410641074108410941104111411241134114411541164117411841194120412141224123412441254126412741284129413041314132413341344135413641374138413941404141414241434144414541464147414841494150415141524153415441554156415741584159416041614162416341644165416641674168416941704171417241734174417541764177417841794180418141824183418441854186418741884189419041914192419341944195419641974198419942004201420242034204420542064207420842094210421142124213421442154216421742184219422042214222422342244225422642274228422942304231423242334234423542364237423842394240424142424243424442454246424742484249425042514252425342544255425642574258425942604261426242634264426542664267426842694270427142724273427442754276427742784279428042814282428342844285428642874288428942904291429242934294429542964297429842994300430143024303430443054306430743084309431043114312431343144315431643174318431943204321432243234324432543264327432843294330433143324333433443354336433743384339434043414342434343444345434643474348434943504351435243534354435543564357435843594360436143624363436443654366436743684369437043714372437343744375437643774378437943804381438243834384438543864387438843894390439143924393439443954396439743984399440044014402440344044405440644074408440944104411441244134414441544164417441844194420442144224423442444254426442744284429443044314432443344344435443644374438443944404441444244434444444544464447444844494450445144524453445444554456445744584459446044614462446344644465446644674468446944704471447244734474447544764477447844794480448144824483448444854486448744884489449044914492449344944495449644974498449945004501450245034504450545064507450845094510451145124513451445154516451745184519452045214522452345244525452645274528452945304531453245334534453545364537453845394540454145424543454445454546454745484549455045514552455345544555455645574558455945604561456245634564456545664567456845694570457145724573457445754576457745784579458045814582458345844585458645874588458945904591459245934594459545964597459845994600460146024603460446054606460746084609461046114612461346144615461646174618461946204621462246234624462546264627462846294630463146324633463446354636463746384639464046414642464346444645464646474648464946504651465246534654465546564657465846594660466146624663466446654666466746684669467046714672467346744675467646774678467946804681468246834684468546864687468846894690469146924693469446954696469746984699470047014702470347044705470647074708470947104711471247134714471547164717471847194720472147224723472447254726472747284729473047314732473347344735473647374738473947404741474247434744474547464747474847494750475147524753475447554756475747584759476047614762476347644765476647674768476947704771477247734774477547764777477847794780478147824783478447854786478747884789479047914792479347944795479647974798479948004801480248034804480548064807480848094810481148124813481448154816481748184819482048214822482348244825482648274828482948304831483248334834483548364837483848394840484148424843484448454846484748484849485048514852485348544855485648574858485948604861486248634864486548664867486848694870487148724873487448754876487748784879488048814882488348844885488648874888488948904891489248934894489548964897489848994900490149024903490449054906490749084909491049114912491349144915491649174918491949204921492249234924492549264927492849294930493149324933493449354936493749384939494049414942494349444945494649474948494949504951495249534954495549564957495849594960496149624963496449654966496749684969497049714972497349744975497649774978497949804981498249834984498549864987498849894990499149924993499449954996499749984999500050015002500350045005500650075008500950105011501250135014501550165017501850195020502150225023502450255026502750285029503050315032503350345035503650375038503950405041504250435044504550465047504850495050505150525053505450555056505750585059506050615062506350645065506650675068506950705071507250735074507550765077507850795080508150825083508450855086508750885089509050915092509350945095509650975098509951005101510251035104510551065107510851095110511151125113511451155116511751185119512051215122512351245125512651275128512951305131513251335134513551365137513851395140514151425143514451455146514751485149515051515152515351545155515651575158515951605161516251635164516551665167516851695170517151725173517451755176517751785179518051815182518351845185518651875188518951905191519251935194519551965197519851995200520152025203520452055206520752085209521052115212521352145215521652175218521952205221522252235224522552265227522852295230523152325233523452355236523752385239524052415242524352445245524652475248524952505251525252535254525552565257525852595260526152625263526452655266526752685269527052715272527352745275527652775278527952805281528252835284528552865287528852895290529152925293529452955296529752985299530053015302530353045305530653075308530953105311531253135314531553165317531853195320532153225323532453255326532753285329533053315332533353345335533653375338533953405341534253435344534553465347534853495350535153525353535453555356535753585359536053615362536353645365536653675368536953705371537253735374537553765377537853795380538153825383538453855386538753885389539053915392539353945395539653975398539954005401540254035404540554065407540854095410541154125413541454155416541754185419542054215422542354245425542654275428542954305431543254335434543554365437543854395440544154425443544454455446544754485449
  1. /// <reference lib="dom" />
  2. import type { BrowserContext, Page } from 'playwright';
  3. import { launchBrowser } from '../automation/browserProvider.js';
  4. import { logger, safeStringify } from '../utils/logger.js';
  5. import {
  6. extractDeclaredNotesCountFromPostedResponse,
  7. extractLatestXiaohongshuFansCount,
  8. extractXiaohongshuProfileInfo,
  9. } from '../utils/xiaohongshu.js';
  10. import { extractPlatformWorkCoverUrl } from '../utils/platformWorkCover.js';
  11. import type { PlatformType } from '@media-manager/shared';
  12. // 抖音 API 接口配置
  13. const DOUYIN_API = {
  14. // 检查用户登录状态 - 返回 result: true 表示已登录(需要在浏览器上下文中调用)
  15. CHECK_USER: '/aweme/v1/creator/check/user/',
  16. // 获取作品列表(新接口,支持分页)
  17. WORK_LIST: 'https://creator.douyin.com/janus/douyin/creator/pc/work_list',
  18. // 获取评论列表
  19. COMMENT_LIST: 'https://creator.douyin.com/web/api/third_party/aweme/api/comment/read/aweme/v1/web/comment/list/select/',
  20. // 创作者首页(用于触发登录检查)
  21. CREATOR_HOME: 'https://creator.douyin.com/creator-micro/home',
  22. };
  23. // 平台 API 配置(用于直接 HTTP 请求检查)
  24. const PLATFORM_API_CONFIG: Record<string, {
  25. checkUrl: string;
  26. isValidResponse: (data: unknown) => boolean;
  27. }> = {
  28. douyin: {
  29. // 使用账号基础信息接口检查 Cookie 有效性
  30. checkUrl: 'https://creator.douyin.com/web/api/creator/mcn/account_base_info?show_mcn_status=1',
  31. isValidResponse: (data: unknown) => {
  32. const resp = data as { status_code?: number; BaseResp?: { StatusCode?: number } };
  33. // status_code 为 0 表示 Cookie 有效
  34. return resp?.status_code === 0 || resp?.BaseResp?.StatusCode === 0;
  35. },
  36. },
  37. baijiahao: {
  38. // 使用 appinfo 接口检查 Cookie 有效性
  39. checkUrl: 'https://baijiahao.baidu.com/builder/app/appinfo',
  40. isValidResponse: (data: unknown) => {
  41. const resp = data as {
  42. errno?: number;
  43. errmsg?: string;
  44. data?: {
  45. user?: {
  46. name?: string;
  47. app_id?: string | number;
  48. userid?: number;
  49. status?: string;
  50. }
  51. }
  52. };
  53. logger.info(`[Baijiahao] API response: errno=${resp?.errno}, errmsg=${resp?.errmsg}, user.name=${resp?.data?.user?.name}, user.app_id=${resp?.data?.user?.app_id}, user.status=${resp?.data?.user?.status}`);
  54. // errno 为 0 表示请求成功
  55. const isErrnoOk = resp?.errno === 0;
  56. // 必须有用户信息(name 或 app_id)
  57. const hasUserInfo = !!(resp?.data?.user?.name || resp?.data?.user?.app_id);
  58. // 用户状态不能是 'banned' 或其他异常状态(兼容 normal 等常见正常状态)
  59. const userStatus = resp?.data?.user?.status;
  60. const isStatusOk = !userStatus || ['audit', 'pass', 'active', 'normal'].includes(String(userStatus));
  61. const isValid = isErrnoOk && hasUserInfo && isStatusOk;
  62. if (!isValid) {
  63. logger.warn(`[Baijiahao] Cookie invalid: errno=${resp?.errno}, hasUserInfo=${hasUserInfo}, status=${userStatus}`);
  64. }
  65. return isValid;
  66. },
  67. },
  68. };
  69. export interface AccountInfo {
  70. accountId: string;
  71. accountName: string;
  72. avatarUrl: string;
  73. fansCount?: number;
  74. worksCount: number;
  75. worksList?: WorkItem[];
  76. worksListComplete?: boolean;
  77. source?: 'playwright' | 'api';
  78. }
  79. export interface WorkItem {
  80. videoId?: string;
  81. title: string;
  82. coverUrl: string;
  83. /** 作品播放/详情页 URL,同步到 works.video_url */
  84. videoUrl?: string;
  85. duration: string;
  86. publishTime: string;
  87. status: string;
  88. playCount: number;
  89. likeCount: number;
  90. commentCount: number;
  91. shareCount: number;
  92. collectCount?: number;
  93. }
  94. export interface CommentItem {
  95. commentId: string;
  96. authorId: string;
  97. authorName: string;
  98. authorAvatar: string;
  99. content: string;
  100. likeCount: number;
  101. commentTime: string;
  102. parentCommentId?: string;
  103. videoId?: string;
  104. videoTitle?: string;
  105. videoCoverUrl?: string;
  106. }
  107. export interface WorkComments {
  108. videoId: string;
  109. videoTitle: string;
  110. videoCoverUrl: string;
  111. comments: CommentItem[];
  112. }
  113. export interface CookieData {
  114. name: string;
  115. value: string;
  116. domain: string;
  117. path: string;
  118. sameSite?: string;
  119. }
  120. export type CookieCheckSource = 'api' | 'browser';
  121. export type CookieCheckReason =
  122. | 'valid'
  123. | 'need_login'
  124. | 'risk_control'
  125. | 'uncertain';
  126. export interface CookieCheckResult {
  127. isValid: boolean;
  128. needReLogin: boolean;
  129. uncertain: boolean;
  130. reason: CookieCheckReason;
  131. source: CookieCheckSource;
  132. message?: string;
  133. }
  134. /**
  135. * 无头浏览器服务 - 用于后台静默获取账号信息
  136. */
  137. class HeadlessBrowserService {
  138. /**
  139. * 检查 Cookie 是否有效
  140. * 优先使用平台 API 检查,必要时回退到浏览器检测
  141. */
  142. async checkCookieValid(platform: PlatformType, cookies: CookieData[]): Promise<boolean> {
  143. const status = await this.checkCookieStatus(platform, cookies);
  144. return status.isValid || status.uncertain;
  145. }
  146. /**
  147. * 检查 Cookie 状态(有效 / 需要重新登录 / 不确定)
  148. */
  149. async checkCookieStatus(platform: PlatformType, cookies: CookieData[]): Promise<CookieCheckResult> {
  150. logger.info(`[checkCookieStatus] Checking cookie for ${platform}, cookie count: ${cookies.length}`);
  151. const apiConfig = PLATFORM_API_CONFIG[platform];
  152. if (apiConfig) {
  153. const result = await this.checkCookieStatusByApi(platform, cookies, apiConfig);
  154. logger.info(
  155. `[checkCookieStatus] API result for ${platform}: isValid=${result.isValid}, needReLogin=${result.needReLogin}, uncertain=${result.uncertain}, reason=${result.reason}`
  156. );
  157. return result;
  158. }
  159. const result = await this.checkCookieStatusByBrowser(platform, cookies);
  160. logger.info(
  161. `[checkCookieStatus] Browser result for ${platform}: isValid=${result.isValid}, needReLogin=${result.needReLogin}, uncertain=${result.uncertain}, reason=${result.reason}`
  162. );
  163. return result;
  164. }
  165. private containsRiskKeywords(text: string): boolean {
  166. if (!text) return false;
  167. const lowered = text.toLowerCase();
  168. const keywords = [
  169. '验证码',
  170. '安全验证',
  171. '人机验证',
  172. '滑块',
  173. '风控',
  174. '风险',
  175. '访问受限',
  176. '行为异常',
  177. '系统检测到异常',
  178. '安全校验',
  179. 'captcha',
  180. 'verify',
  181. 'challenge',
  182. 'risk',
  183. 'security',
  184. 'safe',
  185. 'protect',
  186. 'blocked',
  187. ];
  188. return keywords.some(k => lowered.includes(k.toLowerCase()));
  189. }
  190. private async getPageBodyTextSafe(page: Page): Promise<string> {
  191. try {
  192. const content = await page.textContent('body');
  193. return (content || '').slice(0, 8000);
  194. } catch {
  195. return '';
  196. }
  197. }
  198. /**
  199. * 通过平台 API 检查登录状态
  200. */
  201. private async checkCookieStatusByApi(
  202. platform: PlatformType,
  203. cookies: CookieData[],
  204. apiConfig: typeof PLATFORM_API_CONFIG[string]
  205. ): Promise<CookieCheckResult> {
  206. try {
  207. // 构建 Cookie 字符串(所有 Cookie 拼接)
  208. const cookieString = cookies
  209. .map(c => `${c.name}=${c.value}`)
  210. .join('; ');
  211. logger.info(`Checking cookie for ${platform}, cookie count: ${cookies.length}`);
  212. // 发起 API 请求,模拟浏览器请求
  213. const response = await fetch(apiConfig.checkUrl, {
  214. method: 'GET',
  215. headers: {
  216. 'Accept': 'application/json, text/plain, */*',
  217. 'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8',
  218. 'Cache-Control': 'no-cache',
  219. 'Cookie': cookieString,
  220. 'Pragma': 'no-cache',
  221. 'Referer': this.getPlatformReferer(platform),
  222. 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
  223. 'sec-ch-ua': '"Not_A Brand";v="8", "Chromium";v="120", "Google Chrome";v="120"',
  224. 'sec-ch-ua-mobile': '?0',
  225. 'sec-ch-ua-platform': '"Windows"',
  226. 'sec-fetch-dest': 'empty',
  227. 'sec-fetch-mode': 'cors',
  228. 'sec-fetch-site': 'same-origin',
  229. },
  230. });
  231. const data = await response.json();
  232. logger.info(`[API] Raw response for ${platform}:`, JSON.stringify(data).substring(0, 500));
  233. const isValid = apiConfig.isValidResponse(data);
  234. const statusCode = (data as { status_code?: number; errno?: number; ret?: { errno?: number } })?.status_code
  235. ?? (data as { errno?: number })?.errno
  236. ?? (data as { ret?: { errno?: number } })?.ret?.errno;
  237. logger.info(`API check cookie for ${platform}: valid=${isValid}, statusCode=${statusCode}`);
  238. // 如果 API 明确返回有效,直接返回 true
  239. if (isValid) {
  240. const rawText = JSON.stringify(data).slice(0, 2000);
  241. if (this.containsRiskKeywords(rawText)) {
  242. return {
  243. isValid: false,
  244. needReLogin: true,
  245. uncertain: false,
  246. reason: 'risk_control',
  247. source: 'api',
  248. message: 'API 返回疑似风控/验证页面',
  249. };
  250. }
  251. return {
  252. isValid: true,
  253. needReLogin: false,
  254. uncertain: false,
  255. reason: 'valid',
  256. source: 'api',
  257. };
  258. }
  259. // API 返回无效时,检查是否是明确的"未登录"状态
  260. // 抖音: status_code 为 2 或 8 通常表示未登录/登录过期
  261. // 百家号: errno 为非 0 可能表示未登录,但需要根据具体错误码判断
  262. const clearlyNotLoggedIn = statusCode === 2 || statusCode === 8;
  263. if (clearlyNotLoggedIn) {
  264. logger.info(`[API] Platform ${platform} clearly not logged in (statusCode=${statusCode})`);
  265. return {
  266. isValid: false,
  267. needReLogin: true,
  268. uncertain: false,
  269. reason: 'need_login',
  270. source: 'api',
  271. };
  272. }
  273. // 百家号特殊处理:API 用 Node fetch 调用时可能因分散认证等返回 errno !== 0,但 Cookie 在浏览器内仍有效
  274. // 因此当 API 判为无效时,回退到浏览器检查,避免“能登录后台却显示过期”
  275. if (platform === 'baijiahao') {
  276. const errno = (data as { errno?: number })?.errno;
  277. if (errno === 0 && isValid) {
  278. return { isValid: true, needReLogin: false, uncertain: false, reason: 'valid', source: 'api' };
  279. }
  280. if (errno === 0 && !isValid) {
  281. logger.warn(`[API] Baijiahao errno=0 but no user info, falling back to browser check`);
  282. return this.checkCookieStatusByBrowser(platform, cookies);
  283. }
  284. // errno 110 通常表示未登录,可直接判无效;其他 errno(如 10001402 分散认证)可能只是接口限制,用浏览器再判一次
  285. if (errno === 110) {
  286. logger.warn(`[API] Baijiahao errno=110 (not logged in), cookie invalid`);
  287. return { isValid: false, needReLogin: true, uncertain: false, reason: 'need_login', source: 'api' };
  288. }
  289. logger.info(`[API] Baijiahao errno=${errno}, falling back to browser check (may be dispersed auth)`);
  290. return this.checkCookieStatusByBrowser(platform, cookies);
  291. }
  292. // 不确定的状态(如 status_code=7),回退到浏览器检查
  293. logger.info(`[API] Uncertain status for ${platform} (statusCode=${statusCode}), falling back to browser check`);
  294. return this.checkCookieStatusByBrowser(platform, cookies);
  295. } catch (error) {
  296. logger.error(`API check cookie error for ${platform}:`, error);
  297. // API 检查失败时,回退到浏览器检查
  298. return this.checkCookieStatusByBrowser(platform, cookies);
  299. }
  300. }
  301. /**
  302. * 获取平台 Referer
  303. */
  304. private getPlatformReferer(platform: PlatformType): string {
  305. const referers: Record<string, string> = {
  306. douyin: 'https://creator.douyin.com/',
  307. bilibili: 'https://member.bilibili.com/',
  308. kuaishou: 'https://cp.kuaishou.com/',
  309. baijiahao: 'https://baijiahao.baidu.com/',
  310. };
  311. return referers[platform] || '';
  312. }
  313. private normalizePlaywrightCookies(
  314. cookies: CookieData[]
  315. ): Array<Omit<CookieData, 'sameSite'> & { sameSite?: 'Strict' | 'Lax' | 'None' }> {
  316. return cookies.map((cookie) => {
  317. const { sameSite, ...baseCookie } = cookie;
  318. let normalizedSameSite: 'Strict' | 'Lax' | 'None' | undefined;
  319. if (sameSite) {
  320. const value = sameSite.toLowerCase();
  321. if (value === 'strict') normalizedSameSite = 'Strict';
  322. else if (value === 'lax') normalizedSameSite = 'Lax';
  323. else if (value === 'none' || value === 'no_restriction') normalizedSameSite = 'None';
  324. else if (value !== 'unspecified') {
  325. logger.warn(`[Cookie] Invalid sameSite value: ${sameSite}, omitting sameSite`);
  326. }
  327. }
  328. return normalizedSameSite
  329. ? { ...baseCookie, sameSite: normalizedSameSite }
  330. : baseCookie;
  331. });
  332. }
  333. /**
  334. * 通过浏览器检查 Cookie 是否有效(检查是否被重定向到登录页)
  335. * 注意:网络错误或服务不可用时返回 true(保持原状态),避免误判为过期
  336. */
  337. private async checkCookieStatusByBrowser(platform: PlatformType, cookies: CookieData[]): Promise<CookieCheckResult> {
  338. // 对于抖音平台,使用 check/user 接口检查
  339. if (platform === 'douyin') {
  340. return this.checkDouyinLoginStatusByApi(cookies);
  341. }
  342. const browser = await launchBrowser({ headless: true });
  343. try {
  344. const context = await browser.newContext({
  345. viewport: { width: 1920, height: 1080 },
  346. locale: 'zh-CN',
  347. timezoneId: 'Asia/Shanghai',
  348. userAgent: 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
  349. });
  350. await context.addCookies(this.normalizePlaywrightCookies(cookies));
  351. const page = await context.newPage();
  352. const config = this.getPlatformConfig(platform);
  353. // #6065: 视频号使用专门的平台登录页检查(登录后 URL 含特定路径,非登录时可能不重定向)
  354. if (platform === 'weixin_video') {
  355. return this.checkWeixinVideoLoginStatusByBrowser(page, context, cookies, browser);
  356. }
  357. // 访问平台主页
  358. await page.goto(config.homeUrl, {
  359. waitUntil: 'domcontentloaded',
  360. timeout: 30000,
  361. });
  362. await page.waitForTimeout(3000);
  363. const url = page.url();
  364. logger.info(`Browser check cookie for ${platform}: URL=${url}`);
  365. // 检查是否被重定向到登录页
  366. const isLoginPage = config.loginIndicators.some(indicator => url.includes(indicator));
  367. const bodyText = await this.getPageBodyTextSafe(page);
  368. const isRiskControl = this.containsRiskKeywords(url) || this.containsRiskKeywords(bodyText);
  369. await page.close();
  370. await context.close();
  371. await browser.close();
  372. if (isLoginPage) {
  373. return {
  374. isValid: false,
  375. needReLogin: true,
  376. uncertain: false,
  377. reason: 'need_login',
  378. source: 'browser',
  379. };
  380. }
  381. if (isRiskControl) {
  382. return {
  383. isValid: false,
  384. needReLogin: true,
  385. uncertain: false,
  386. reason: 'risk_control',
  387. source: 'browser',
  388. message: '检测到风控/验证页面',
  389. };
  390. }
  391. return {
  392. isValid: true,
  393. needReLogin: false,
  394. uncertain: false,
  395. reason: 'valid',
  396. source: 'browser',
  397. };
  398. } catch (error) {
  399. logger.error(`Browser check cookie error for ${platform}:`, error);
  400. await browser.close();
  401. return {
  402. isValid: false,
  403. needReLogin: false,
  404. uncertain: true,
  405. reason: 'uncertain',
  406. source: 'browser',
  407. message: error instanceof Error ? error.message : 'Browser check error',
  408. };
  409. }
  410. }
  411. /**
  412. * 抖音登录状态检查 - 通过监听 check/user 接口
  413. * 访问创作者首页,监听 check/user 接口返回的 result 字段判断登录状态
  414. */
  415. private async checkDouyinLoginStatusByApi(cookies: CookieData[]): Promise<CookieCheckResult> {
  416. const browser = await launchBrowser({ headless: true });
  417. let isLoggedIn = false;
  418. let checkCompleted = false;
  419. let isRiskControl = false;
  420. let page: import('playwright').Page | null = null;
  421. // 监听 check/user 接口响应
  422. const checkUserHandler = async (response: import('playwright').Response) => {
  423. const url = response.url();
  424. if (url.includes(DOUYIN_API.CHECK_USER)) {
  425. try {
  426. const data = await response.json();
  427. // result: true 表示已登录
  428. isLoggedIn = data?.result === true && data?.status_code === 0;
  429. checkCompleted = true;
  430. logger.info(`[Douyin] check/user API response: result=${data?.result}, status_code=${data?.status_code}, isLoggedIn=${isLoggedIn}`);
  431. } catch {
  432. // 忽略解析错误
  433. }
  434. }
  435. };
  436. try {
  437. const context = await browser.newContext({
  438. viewport: { width: 1920, height: 1080 },
  439. locale: 'zh-CN',
  440. timezoneId: 'Asia/Shanghai',
  441. userAgent: 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
  442. });
  443. await context.addCookies(this.normalizePlaywrightCookies(cookies));
  444. page = await context.newPage();
  445. // 绑定监听器
  446. page.on('response', checkUserHandler);
  447. // 访问创作者首页,触发 check/user 接口
  448. await page.goto(DOUYIN_API.CREATOR_HOME, {
  449. waitUntil: 'domcontentloaded',
  450. timeout: 30000,
  451. });
  452. // 等待接口响应或超时
  453. const startTime = Date.now();
  454. while (!checkCompleted && Date.now() - startTime < 10000) {
  455. await page.waitForTimeout(500);
  456. }
  457. // 如果没有收到 check/user 响应,检查 URL 是否被重定向到登录页
  458. if (!checkCompleted) {
  459. const currentUrl = page.url();
  460. isLoggedIn = !currentUrl.includes('login') && !currentUrl.includes('passport');
  461. logger.info(`[Douyin] No check/user response, fallback to URL check: ${currentUrl}, isLoggedIn=${isLoggedIn}`);
  462. }
  463. const finalUrl = page.url();
  464. const bodyText = await this.getPageBodyTextSafe(page);
  465. isRiskControl = this.containsRiskKeywords(finalUrl) || this.containsRiskKeywords(bodyText);
  466. await page.close();
  467. await context.close();
  468. await browser.close();
  469. if (isRiskControl) {
  470. return {
  471. isValid: false,
  472. needReLogin: true,
  473. uncertain: false,
  474. reason: 'risk_control',
  475. source: 'browser',
  476. message: '检测到风控/验证页面',
  477. };
  478. }
  479. return {
  480. isValid: isLoggedIn,
  481. needReLogin: !isLoggedIn,
  482. uncertain: false,
  483. reason: isLoggedIn ? 'valid' : 'need_login',
  484. source: 'browser',
  485. };
  486. } catch (error) {
  487. logger.error('[Douyin] checkDouyinLoginByApi error:', error);
  488. try {
  489. await browser.close();
  490. } catch { }
  491. return {
  492. isValid: false,
  493. needReLogin: false,
  494. uncertain: true,
  495. reason: 'uncertain',
  496. source: 'browser',
  497. message: error instanceof Error ? error.message : 'Douyin check error',
  498. };
  499. } finally {
  500. // 移除监听器防止内存泄漏
  501. if (page) {
  502. page.off('response', checkUserHandler);
  503. }
  504. }
  505. }
  506. /**
  507. * #6065: 视频号登录状态检测 - 通过检查平台页面是否展示账号信息
  508. * 视频号 Cookie 失效时可能不重定向到登录页(URL 不变),需要正向信号判断
  509. */
  510. private async checkWeixinVideoLoginStatusByBrowser(
  511. page: Page,
  512. context: BrowserContext,
  513. _cookies: CookieData[],
  514. browser: import('playwright').Browser
  515. ): Promise<CookieCheckResult> {
  516. try {
  517. // 视频号创作者平台需要等待加载完成后检测页面内容
  518. await page.goto('https://channels.weixin.qq.com/platform', {
  519. waitUntil: 'domcontentloaded',
  520. timeout: 30000,
  521. });
  522. await page.waitForTimeout(5000);
  523. // 尝试等待网络空闲,给页面足够时间加载
  524. try {
  525. await page.waitForLoadState('networkidle', { timeout: 15000 });
  526. } catch {
  527. // 超时继续
  528. }
  529. const url = page.url();
  530. const bodyText = await this.getPageBodyTextSafe(page);
  531. logger.info(`[Weixin Video] Browser check: URL=${url}, bodyLen=${bodyText.length}`);
  532. // 检测明确的登录页特征
  533. if (url.includes('login.html') || url.includes('/login?') || url.includes('passport')) {
  534. logger.info('[Weixin Video] Redirected to login page');
  535. await page.close();
  536. await context.close();
  537. await browser.close();
  538. return { isValid: false, needReLogin: true, uncertain: false, reason: 'need_login', source: 'browser' };
  539. }
  540. // 检测风控
  541. if (this.containsRiskKeywords(url) || this.containsRiskKeywords(bodyText)) {
  542. logger.info('[Weixin Video] Detected risk control keywords');
  543. await page.close();
  544. await context.close();
  545. await browser.close();
  546. return { isValid: false, needReLogin: true, uncertain: false, reason: 'risk_control', source: 'browser', message: '检测到风控/验证页面' };
  547. }
  548. // #6065: 正向信号检测 - 检查页面上是否存在已登录的账号信息元素
  549. // 视频号登录后首页会出现 nickname、头像等元素
  550. const positiveSignals = [
  551. '.finder-nickname', // 视频号昵称
  552. '.avatar img[src]', // 头像图片
  553. '[class*="video-count"]', // 视频数
  554. '[class*="follower"]', // 关注者
  555. 'div.title-name', // 账号名称
  556. ];
  557. let hasPositiveSignal = false;
  558. for (const selector of positiveSignals) {
  559. try {
  560. const count = await page.locator(selector).count();
  561. if (count > 0) {
  562. hasPositiveSignal = true;
  563. logger.info(`[Weixin Video] Found positive signal: ${selector}`);
  564. break;
  565. }
  566. } catch {
  567. // continue
  568. }
  569. }
  570. // 额外检查:页面正文是否包含视频号管理页面的特征文本
  571. const hasManagementText = bodyText.includes('发表视频') ||
  572. bodyText.includes('数据中心') ||
  573. bodyText.includes('互动管理') ||
  574. bodyText.includes('内容管理');
  575. if (hasPositiveSignal || hasManagementText) {
  576. logger.info(`[Weixin Video] Cookie valid (positive signal=${hasPositiveSignal}, managementText=${hasManagementText})`);
  577. await page.close();
  578. await context.close();
  579. await browser.close();
  580. return { isValid: true, needReLogin: false, uncertain: false, reason: 'valid', source: 'browser' };
  581. }
  582. // 没有正向信号也没有登录指标 → 不确定(可能是页面加载慢或网络问题)
  583. logger.warn(`[Weixin Video] No positive or negative signals detected, marking as uncertain`);
  584. await page.close();
  585. await context.close();
  586. await browser.close();
  587. return { isValid: false, needReLogin: false, uncertain: true, reason: 'uncertain', source: 'browser', message: '无法确定视频号登录状态' };
  588. } catch (error) {
  589. logger.error('[Weixin Video] checkLoginByBrowser error:', error);
  590. try {
  591. await browser.close();
  592. } catch { /* ignore */ }
  593. return { isValid: false, needReLogin: false, uncertain: true, reason: 'uncertain', source: 'browser', message: error instanceof Error ? error.message : 'Weixin video check error' };
  594. }
  595. }
  596. /**
  597. * 访问平台后台页面并截图(用于 AI 分析)
  598. * @param platform 平台类型
  599. * @param cookies Cookie 数据
  600. * @returns Base64 编码的截图,失败返回 null
  601. */
  602. async capturePageScreenshot(platform: PlatformType, cookies: CookieData[]): Promise<string | null> {
  603. const browser = await launchBrowser({ headless: true });
  604. try {
  605. const context = await browser.newContext({
  606. viewport: { width: 1920, height: 1080 },
  607. locale: 'zh-CN',
  608. timezoneId: 'Asia/Shanghai',
  609. userAgent: 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
  610. });
  611. await context.addCookies(this.normalizePlaywrightCookies(cookies));
  612. const page = await context.newPage();
  613. const config = this.getPlatformConfig(platform);
  614. logger.info(`[Screenshot] Navigating to ${platform} home page: ${config.homeUrl}`);
  615. // 访问平台主页
  616. await page.goto(config.homeUrl, {
  617. waitUntil: 'domcontentloaded',
  618. timeout: 30000,
  619. });
  620. // 等待页面加载
  621. await page.waitForTimeout(3000);
  622. const url = page.url();
  623. logger.info(`[Screenshot] Current URL: ${url}`);
  624. // 截图
  625. const screenshotBuffer = await page.screenshot({
  626. type: 'jpeg',
  627. quality: 80,
  628. fullPage: false,
  629. });
  630. const base64Screenshot = screenshotBuffer.toString('base64');
  631. await page.close();
  632. await context.close();
  633. await browser.close();
  634. logger.info(`[Screenshot] Captured screenshot for ${platform}, size: ${Math.round(base64Screenshot.length / 1024)}KB`);
  635. return base64Screenshot;
  636. } catch (error) {
  637. logger.error(`[Screenshot] Failed to capture screenshot for ${platform}:`, error);
  638. try {
  639. await browser.close();
  640. } catch { /* ignore */ }
  641. return null;
  642. }
  643. }
  644. /**
  645. * 获取账号信息
  646. */
  647. async fetchAccountInfo(
  648. platform: PlatformType,
  649. cookies: CookieData[],
  650. options?: {
  651. onWorksFetchProgress?: (info: {
  652. platform: PlatformType;
  653. page: string;
  654. pageSize: number;
  655. fetched: number;
  656. newCount: number;
  657. totalSoFar: number;
  658. declaredTotal?: number;
  659. hasMore: boolean;
  660. nextPage?: unknown;
  661. }) => void;
  662. }
  663. ): Promise<AccountInfo> {
  664. logger.info(`[fetchAccountInfo] Starting for platform: ${platform}`);
  665. void options;
  666. if (platform === 'baijiahao') {
  667. try {
  668. const info = await this.fetchBaijiahaoAccountInfoDirectApi(cookies);
  669. info.source = 'api';
  670. if ((info.worksList?.length || 0) > 0 || info.worksCount > 0) {
  671. return info;
  672. }
  673. } catch (apiError) {
  674. logger.warn(`[Baijiahao] Direct API failed, falling back to Playwright:`, apiError);
  675. }
  676. }
  677. logger.info(`[Playwright] Fetching account info for ${platform}`);
  678. const info = await this.fetchAccountInfoWithPlaywright(platform, cookies);
  679. info.source = 'playwright';
  680. return info;
  681. }
  682. private async fetchAccountInfoWithPlaywright(platform: PlatformType, cookies: CookieData[]): Promise<AccountInfo> {
  683. const browser = await launchBrowser({ headless: true });
  684. try {
  685. const context = await browser.newContext({
  686. viewport: { width: 1920, height: 1080 },
  687. locale: 'zh-CN',
  688. timezoneId: 'Asia/Shanghai',
  689. userAgent: 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
  690. });
  691. // 规范化 cookies 的 sameSite 值,Playwright 只接受 Strict/Lax/None
  692. const validSameSiteValues = ["Strict", "Lax", "None"];
  693. const normalizedCookies = cookies.map(cookie => {
  694. const sameSite = (cookie as any).sameSite as string | undefined;
  695. let normalizedSameSite: "Strict" | "Lax" | "None" | undefined = "Lax";
  696. if (sameSite && validSameSiteValues.includes(sameSite)) {
  697. normalizedSameSite = sameSite as "Strict" | "Lax" | "None";
  698. } else if (sameSite) {
  699. // 尝试大小写不敏感匹配
  700. const lowerSameSite = sameSite.toLowerCase();
  701. if (lowerSameSite === "strict") normalizedSameSite = "Strict";
  702. else if (lowerSameSite === "lax") normalizedSameSite = "Lax";
  703. else if (lowerSameSite === "none") normalizedSameSite = "None";
  704. else {
  705. // 无效值,使用默认 Lax
  706. logger.warn("[Cookie] Invalid sameSite value: " + sameSite + ", defaulting to Lax");
  707. normalizedSameSite = "Lax";
  708. }
  709. }
  710. return {
  711. ...cookie,
  712. sameSite: normalizedSameSite
  713. };
  714. });
  715. await context.addCookies(normalizedCookies);
  716. const page = await context.newPage();
  717. let accountInfo: AccountInfo;
  718. switch (platform) {
  719. case 'douyin':
  720. accountInfo = await this.fetchDouyinAccountInfo(page, context, cookies);
  721. break;
  722. case 'bilibili':
  723. accountInfo = await this.fetchBilibiliAccountInfo(page, context, cookies);
  724. break;
  725. case 'kuaishou':
  726. accountInfo = await this.fetchKuaishouAccountInfo(page, context, cookies);
  727. break;
  728. case 'xiaohongshu':
  729. accountInfo = await this.fetchXiaohongshuAccountInfo(page, context, cookies);
  730. break;
  731. case 'weixin_video':
  732. accountInfo = await this.fetchWeixinVideoAccountInfo(page, context, cookies);
  733. break;
  734. default:
  735. accountInfo = this.getDefaultAccountInfo(platform);
  736. }
  737. await page.close();
  738. await context.close();
  739. await browser.close();
  740. return accountInfo;
  741. } catch (error) {
  742. logger.error('HeadlessBrowser fetchAccountInfo error:', error);
  743. await browser.close();
  744. return this.getDefaultAccountInfo(platform);
  745. }
  746. }
  747. /**
  748. * 获取抖音账号信息 - 通过 API 方式获取
  749. * 1. 监听 check/user 接口验证登录状态
  750. * 2. 通过 work_list API 获取作品数和作品列表
  751. */
  752. private async fetchDouyinAccountInfo(
  753. page: Page,
  754. _context: BrowserContext,
  755. cookies: CookieData[]
  756. ): Promise<AccountInfo> {
  757. let accountId = `douyin_${Date.now()}`;
  758. let accountName = '抖音账号';
  759. let avatarUrl = '';
  760. let fansCount: number | undefined;
  761. let worksCount = 0;
  762. let worksList: WorkItem[] = [];
  763. let isLoggedIn = false;
  764. // 用于存储从 API 捕获的数据
  765. const capturedData: {
  766. userInfo?: {
  767. nickname?: string;
  768. avatar?: string;
  769. uid?: string;
  770. sec_uid?: string;
  771. unique_id?: string; // 抖音号(如 Ethanfly9392)
  772. short_id?: string; // 短ID
  773. follower_count?: number;
  774. };
  775. dataOverview?: {
  776. fans_count?: number;
  777. total_works?: number;
  778. total_play?: number;
  779. };
  780. worksList?: Array<{
  781. awemeId: string;
  782. title: string;
  783. coverUrl: string;
  784. duration: number;
  785. createTime: number;
  786. statistics: { play_count: number; digg_count: number; comment_count: number; share_count: number; collect_count: number };
  787. }>;
  788. total?: number;
  789. } = {};
  790. // 设置 API 响应监听器
  791. const responseHandler = async (response: import('playwright').Response) => {
  792. const url = response.url();
  793. try {
  794. // 监听 check/user 接口 - 验证登录状态
  795. if (url.includes(DOUYIN_API.CHECK_USER)) {
  796. const data = await response.json();
  797. isLoggedIn = data?.result === true && data?.status_code === 0;
  798. logger.info(`[Douyin API] check/user: isLoggedIn=${isLoggedIn}`);
  799. }
  800. // 监听 work_list 接口 - 获取作品列表
  801. if (url.includes('/work_list') || url.includes('/janus/douyin/creator/pc/work_list')) {
  802. const data = await response.json();
  803. if (data?.aweme_list && data.aweme_list.length > 0) {
  804. // 优先从 author.aweme_count 获取真实的作品数(最准确)
  805. const firstAweme = data.aweme_list[0];
  806. const authorAwemeCount = firstAweme?.author?.aweme_count;
  807. if (authorAwemeCount !== undefined && authorAwemeCount > 0) {
  808. capturedData.total = authorAwemeCount;
  809. logger.info(`[Douyin API] Using author.aweme_count as works count: ${authorAwemeCount}`);
  810. } else {
  811. // 备用方案:使用 items 数组长度
  812. const itemsCount = data?.items?.length || 0;
  813. if (itemsCount > 0) {
  814. capturedData.total = (capturedData.total || 0) + itemsCount;
  815. } else {
  816. // 如果没有 items,使用 aweme_list 长度
  817. capturedData.total = (capturedData.total || 0) + data.aweme_list.length;
  818. }
  819. }
  820. // 解析作品列表;video_url 使用 video.play_addr.url_list 的第一项
  821. capturedData.worksList = data.aweme_list.map((aweme: Record<string, unknown>) => {
  822. const statistics = aweme.statistics as Record<string, unknown> || {};
  823. const cover = aweme.Cover as { url_list?: string[] } || aweme.video as { cover?: { url_list?: string[] } };
  824. const coverUrl = cover?.url_list?.[0] || (cover as { cover?: { url_list?: string[] } })?.cover?.url_list?.[0] || '';
  825. const video = aweme.video as { play_addr?: { url_list?: string[] } } | undefined;
  826. const videoUrl = video?.play_addr?.url_list?.[0] || '';
  827. return {
  828. awemeId: String(aweme.aweme_id || ''),
  829. title: String(aweme.item_title || aweme.desc || '').split('\n')[0].slice(0, 50) || '无标题',
  830. coverUrl,
  831. videoUrl,
  832. duration: Number(aweme.duration || 0),
  833. createTime: Number(aweme.create_time || 0),
  834. statistics: {
  835. play_count: Number(statistics.play_count || 0),
  836. digg_count: Number(statistics.digg_count || 0),
  837. comment_count: Number(statistics.comment_count || 0),
  838. share_count: Number(statistics.share_count || 0),
  839. collect_count: Number((statistics as any).collect_count || 0),
  840. },
  841. };
  842. });
  843. logger.info(`[Douyin API] work_list: itemsCount=${capturedData.total}, aweme_list_length=${capturedData.worksList?.length}`);
  844. }
  845. }
  846. // 监听账号信息接口 - 增加更多可能的接口
  847. if (url.includes('/account_base_info') ||
  848. url.includes('/user/info') ||
  849. url.includes('/creator/user') ||
  850. url.includes('/data/overview') ||
  851. url.includes('/creator-micro/data') ||
  852. url.includes('/home_data')) {
  853. const data = await response.json();
  854. logger.info(`[Douyin API] Captured response from: ${url.split('?')[0]}`);
  855. // 处理 data/overview API - 获取总作品数
  856. if (url.includes('/data/overview') || url.includes('/creator-micro/data')) {
  857. if (data?.data) {
  858. capturedData.dataOverview = {
  859. fans_count: data.data.fans_count || data.data.follower_count,
  860. total_works: data.data.total_item_cnt || data.data.works_count || data.data.video_count,
  861. total_play: data.data.total_play_cnt,
  862. };
  863. logger.info(`[Douyin API] Captured data overview: total_works=${capturedData.dataOverview.total_works}, fans_count=${capturedData.dataOverview.fans_count}`);
  864. }
  865. }
  866. // 尝试多种数据结构
  867. const user = data?.user || data?.data?.user || data?.data || data;
  868. if (user) {
  869. const nickname = user.nickname || user.name || user.nick_name || user.user_name;
  870. const avatar = user.avatar_url || user.avatar_thumb?.url_list?.[0] || user.avatar || user.avatar_larger?.url_list?.[0];
  871. const uid = user.uid || user.user_id || user.id;
  872. const fans = user.follower_count || user.fans_count || user.mplatform_followers_count;
  873. // 获取抖音号(unique_id 或 short_id)
  874. const uniqueId = user.unique_id || user.short_id || user.douyin_id;
  875. if (nickname || uid || uniqueId) {
  876. capturedData.userInfo = {
  877. nickname: nickname,
  878. avatar: avatar,
  879. uid: uid,
  880. sec_uid: user.sec_uid,
  881. unique_id: uniqueId,
  882. short_id: user.short_id,
  883. follower_count: fans,
  884. };
  885. logger.info(`[Douyin API] user info captured: nickname=${capturedData.userInfo.nickname}, uid=${capturedData.userInfo.uid}, unique_id=${capturedData.userInfo.unique_id}`);
  886. }
  887. }
  888. }
  889. } catch (e) {
  890. // 忽略非 JSON 响应
  891. }
  892. };
  893. try {
  894. // 从 Cookie 获取用户 ID
  895. const uidCookie = cookies.find(c =>
  896. ['passport_uid', 'uid', 'ssid'].includes(c.name)
  897. );
  898. if (uidCookie?.value) {
  899. accountId = `douyin_${uidCookie.value}`;
  900. }
  901. // 绑定监听器
  902. page.on('response', responseHandler);
  903. // 访问主页获取基本信息并触发 check/user 接口
  904. logger.info('[Douyin] Navigating to creator home...');
  905. await page.goto(DOUYIN_API.CREATOR_HOME, {
  906. waitUntil: 'domcontentloaded',
  907. timeout: 30000,
  908. });
  909. // 等待页面加载完成
  910. await page.waitForTimeout(3000);
  911. // 尝试等待网络空闲
  912. try {
  913. await page.waitForLoadState('networkidle', { timeout: 10000 });
  914. } catch {
  915. // 超时继续
  916. }
  917. // 额外等待确保 API 响应被捕获
  918. await page.waitForTimeout(2000);
  919. // 访问数据中心页面,触发 data/overview API
  920. logger.info('[Douyin] Navigating to data center to trigger data/overview API...');
  921. try {
  922. await page.goto('https://creator.douyin.com/creator-micro/data-center/operation', {
  923. waitUntil: 'domcontentloaded',
  924. timeout: 15000,
  925. });
  926. await page.waitForTimeout(3000);
  927. // 检查是否获取到 dataOverview
  928. if (capturedData.dataOverview?.total_works) {
  929. logger.info(`[Douyin] Captured dataOverview from data center: total_works=${capturedData.dataOverview.total_works}`);
  930. } else {
  931. logger.warn('[Douyin] Failed to capture dataOverview from data center');
  932. }
  933. } catch (error) {
  934. logger.warn('[Douyin] Failed to navigate to data center:', error);
  935. }
  936. // #6088: 如果还没有获取到作品列表,主动访问内容管理页面触发 work_list API
  937. if (!capturedData.worksList || capturedData.worksList.length === 0) {
  938. logger.info('[Douyin] No works captured yet, navigating to content manage page to trigger work_list API...');
  939. try {
  940. await page.goto('https://creator.douyin.com/creator-micro/content/manage', {
  941. waitUntil: 'domcontentloaded',
  942. timeout: 15000,
  943. });
  944. await page.waitForTimeout(5000);
  945. if (capturedData.worksList && capturedData.worksList.length > 0) {
  946. logger.info(`[Douyin] Captured ${capturedData.worksList.length} works from content manage page`);
  947. } else {
  948. logger.warn('[Douyin] Still no works captured from content manage page');
  949. }
  950. } catch (error) {
  951. logger.warn('[Douyin] Failed to navigate to content manage page:', error);
  952. }
  953. }
  954. // 检查登录状态 - 如果没有从 API 获取到,通过 URL 判断
  955. if (!isLoggedIn) {
  956. const currentUrl = page.url();
  957. isLoggedIn = !currentUrl.includes('login') && !currentUrl.includes('passport');
  958. }
  959. if (!isLoggedIn) {
  960. logger.warn('[Douyin] Not logged in, returning default account info');
  961. return { accountId, accountName, avatarUrl, fansCount, worksCount, worksList };
  962. }
  963. // 从页面提取基本账号信息(作为 API 数据的补充)
  964. const accountData = await page.evaluate(() => {
  965. const result: { name?: string; avatar?: string; fans?: number; douyinId?: string } = {};
  966. // 提取抖音号 - 多种方式
  967. // 方式1:通过选择器查找包含抖音号的元素
  968. const uniqueIdSelectors = [
  969. 'div[class*="unique"]',
  970. 'span[class*="unique"]',
  971. 'div[class*="douyin-id"]',
  972. 'span[class*="douyin-id"]',
  973. '[class*="account-id"]',
  974. '[class*="shortId"]',
  975. '[class*="short-id"]',
  976. ];
  977. for (const selector of uniqueIdSelectors) {
  978. const el = document.querySelector(selector);
  979. if (el) {
  980. const text = el.textContent?.trim() || '';
  981. const match = text.match(/抖音号[::]\s*(\S+)/) || text.match(/ID[::]\s*(\S+)/);
  982. if (match) {
  983. result.douyinId = match[1];
  984. break;
  985. }
  986. // 如果元素文本本身就是抖音号(无前缀)
  987. if (text && !text.includes('抖音号') && /^[a-zA-Z0-9_]+$/.test(text)) {
  988. result.douyinId = text;
  989. break;
  990. }
  991. }
  992. }
  993. // 方式2:全局搜索包含"抖音号"的文本
  994. if (!result.douyinId) {
  995. const allElements = Array.from(document.querySelectorAll('span, div, p'));
  996. for (const el of allElements) {
  997. const text = el.textContent?.trim() || '';
  998. // 匹配 "抖音号:xxx" 或 "抖音号: xxx"
  999. const match = text.match(/抖音号[::]\s*([a-zA-Z0-9_]+)/);
  1000. if (match && match[1]) {
  1001. result.douyinId = match[1];
  1002. break;
  1003. }
  1004. }
  1005. }
  1006. // 查找头像 - 优先使用头像容器
  1007. const avatarSelectors = [
  1008. '[class*="avatar"] img',
  1009. '[class*="user-avatar"] img',
  1010. '[class*="profile"] img',
  1011. 'img[class*="avatar"]',
  1012. ];
  1013. for (const selector of avatarSelectors) {
  1014. const img = document.querySelector(selector) as HTMLImageElement;
  1015. if (img?.src && (img.src.includes('aweme') || img.src.includes('douyinpic') || img.src.includes('bytedance'))) {
  1016. result.avatar = img.src;
  1017. break;
  1018. }
  1019. }
  1020. // 备用方案:查找所有图片
  1021. if (!result.avatar) {
  1022. const avatarImgs = Array.from(document.querySelectorAll('img'));
  1023. for (const img of avatarImgs) {
  1024. const src = img.src || img.getAttribute('src') || '';
  1025. if (src && (src.includes('aweme') || src.includes('douyinpic') || src.includes('bytedance'))) {
  1026. const rect = img.getBoundingClientRect();
  1027. if (rect.width > 30 && rect.width < 150 && rect.top < 300) {
  1028. result.avatar = src;
  1029. break;
  1030. }
  1031. }
  1032. }
  1033. }
  1034. // 获取粉丝数 - 多种选择器
  1035. const fansSelectors = [
  1036. '#guide_home_fans',
  1037. '[class*="fans"]',
  1038. '[class*="follower"]',
  1039. '[class*="data-item"]',
  1040. ];
  1041. for (const selector of fansSelectors) {
  1042. const el = document.querySelector(selector);
  1043. if (el) {
  1044. const text = el.textContent?.trim() || '';
  1045. // 匹配 "123" 或 "1.2万" 或 "粉丝 123"
  1046. const match = text.match(/(\d+(?:\.\d+)?)\s*([万wW])?/);
  1047. if (match) {
  1048. let num = parseFloat(match[1]);
  1049. if (match[2]) num *= 10000;
  1050. result.fans = Math.floor(num);
  1051. break;
  1052. }
  1053. }
  1054. }
  1055. // 查找用户名 - 更精确的选择器
  1056. const nameSelectors = [
  1057. '[class*="user-name"]',
  1058. '[class*="nickname"]',
  1059. '[class*="author-name"]',
  1060. '[class*="profile-name"]',
  1061. 'h1[class*="name"]',
  1062. 'h2[class*="name"]',
  1063. ];
  1064. for (const selector of nameSelectors) {
  1065. const el = document.querySelector(selector);
  1066. if (el) {
  1067. const text = el.textContent?.trim() || '';
  1068. if (text && text.length >= 2 && text.length <= 30) {
  1069. result.name = text;
  1070. break;
  1071. }
  1072. }
  1073. }
  1074. // 备用方案:查找包含名字的容器
  1075. if (!result.name) {
  1076. const nameContainers = Array.from(document.querySelectorAll('[class*="name"], [class*="nick"], [class*="user"]'));
  1077. for (const container of nameContainers) {
  1078. const text = container.textContent?.trim() || '';
  1079. if (text &&
  1080. text.length >= 2 &&
  1081. text.length <= 20 &&
  1082. !text.includes('关注') &&
  1083. !text.includes('粉丝') &&
  1084. !text.includes('获赞') &&
  1085. !text.includes('加载') &&
  1086. !text.includes('创作') &&
  1087. !text.includes('发布') &&
  1088. !text.includes('抖音号') &&
  1089. !text.match(/^\d+$/)) {
  1090. const rect = container.getBoundingClientRect();
  1091. if (rect.top < 400 && rect.left < 500) {
  1092. result.name = text;
  1093. break;
  1094. }
  1095. }
  1096. }
  1097. }
  1098. return result;
  1099. });
  1100. // 优先使用抖音号作为 ID(unique_id),其次是页面提取的抖音号,最后是 uid
  1101. if (capturedData.userInfo?.unique_id) {
  1102. // 优先使用抖音号(如 Ethanfly9392)
  1103. accountId = `douyin_${capturedData.userInfo.unique_id}`;
  1104. logger.info(`[Douyin] Using unique_id as accountId: ${accountId}`);
  1105. } else if (capturedData.userInfo?.short_id) {
  1106. // 其次使用短ID
  1107. accountId = `douyin_${capturedData.userInfo.short_id}`;
  1108. logger.info(`[Douyin] Using short_id as accountId: ${accountId}`);
  1109. } else if (accountData.douyinId) {
  1110. // 使用页面提取的抖音号
  1111. accountId = `douyin_${accountData.douyinId}`;
  1112. logger.info(`[Douyin] Using page douyinId as accountId: ${accountId}`);
  1113. } else if (capturedData.userInfo?.uid) {
  1114. // 最后使用内部uid
  1115. accountId = `douyin_${capturedData.userInfo.uid}`;
  1116. logger.info(`[Douyin] Using uid as accountId: ${accountId}`);
  1117. }
  1118. accountName = capturedData.userInfo?.nickname || accountData.name || accountName;
  1119. avatarUrl = capturedData.userInfo?.avatar || accountData.avatar || avatarUrl;
  1120. fansCount = capturedData.userInfo?.follower_count || accountData.fans || fansCount;
  1121. // 优先从 dataOverview 获取作品数(最准确)
  1122. if (capturedData.dataOverview?.total_works && capturedData.dataOverview.total_works > 0) {
  1123. worksCount = capturedData.dataOverview.total_works;
  1124. logger.info(`[Douyin] Using dataOverview.total_works as works count: ${worksCount}`);
  1125. }
  1126. // 通过 API 获取作品列表
  1127. logger.info('[Douyin] Fetching works via API...');
  1128. const apiResult = await this.fetchWorksDirectApi(page);
  1129. logger.info(`[Douyin] fetchWorksDirectApi returned: works.length=${apiResult.works.length}, total=${apiResult.total}`);
  1130. if (apiResult.works.length > 0) {
  1131. // 如果之前从 dataOverview 获取到了作品数,优先使用它
  1132. if (worksCount === 0) {
  1133. // 使用 API 返回的总数,如果为 0 则使用实际获取到的作品列表长度
  1134. worksCount = apiResult.total > 0 ? apiResult.total : apiResult.works.length;
  1135. logger.info(`[Douyin] Using API result as works count: ${worksCount} (total=${apiResult.total}, works.length=${apiResult.works.length})`);
  1136. } else {
  1137. logger.info(`[Douyin] Already have works count from dataOverview: ${worksCount}, skipping API result`);
  1138. }
  1139. worksList = apiResult.works.map(w => ({
  1140. videoId: w.awemeId,
  1141. title: w.title,
  1142. coverUrl: w.coverUrl,
  1143. videoUrl: (w as { videoUrl?: string }).videoUrl || (w.awemeId ? `https://www.douyin.com/video/${w.awemeId}` : ''),
  1144. duration: '00:00',
  1145. publishTime: w.createTime ? new Date(w.createTime * 1000).toISOString() : '',
  1146. status: 'published',
  1147. playCount: w.playCount,
  1148. likeCount: w.likeCount,
  1149. commentCount: w.commentCount,
  1150. shareCount: w.shareCount,
  1151. collectCount: w.collectCount,
  1152. }));
  1153. logger.info(`[Douyin] Got ${apiResult.works.length} works from API, total count: ${worksCount}`);
  1154. } else {
  1155. logger.warn(`[Douyin] fetchWorksDirectApi returned 0 works`);
  1156. if (capturedData.worksList && capturedData.worksList.length > 0) {
  1157. // 如果之前从 dataOverview 获取到了作品数,优先使用它
  1158. if (worksCount === 0) {
  1159. // 如果直接 API 调用失败,使用监听到的数据
  1160. logger.info(`[Douyin] Falling back to intercepted API data: ${capturedData.worksList.length} works`);
  1161. worksCount = capturedData.total || capturedData.worksList.length;
  1162. } else {
  1163. logger.info(`[Douyin] Already have works count from dataOverview: ${worksCount}, skipping intercepted data`);
  1164. }
  1165. worksList = capturedData.worksList.map(w => ({
  1166. videoId: w.awemeId,
  1167. title: w.title,
  1168. coverUrl: w.coverUrl,
  1169. videoUrl: (w as { videoUrl?: string }).videoUrl || (w.awemeId ? `https://www.douyin.com/video/${w.awemeId}` : ''),
  1170. duration: this.formatDuration(w.duration),
  1171. publishTime: w.createTime ? new Date(w.createTime * 1000).toISOString() : '',
  1172. status: 'published',
  1173. playCount: w.statistics.play_count,
  1174. likeCount: w.statistics.digg_count,
  1175. commentCount: w.statistics.comment_count,
  1176. shareCount: w.statistics.share_count,
  1177. collectCount: w.statistics.collect_count,
  1178. }));
  1179. logger.info(`[Douyin] Got ${worksCount} works from intercepted API data`);
  1180. } else {
  1181. logger.warn(`[Douyin] No works found: fetchWorksDirectApi returned 0, intercepted data also empty`);
  1182. logger.warn(`[Douyin] This may indicate: cookie expired, API error, or account has no works`);
  1183. }
  1184. }
  1185. } catch (error) {
  1186. logger.error('Failed to fetch Douyin account info:', error);
  1187. logger.error('Error details:', error instanceof Error ? error.stack : String(error));
  1188. } finally {
  1189. // 移除监听器防止内存泄漏
  1190. page.off('response', responseHandler);
  1191. }
  1192. return { accountId, accountName, avatarUrl, fansCount, worksCount, worksList };
  1193. }
  1194. /**
  1195. * 格式化视频时长
  1196. */
  1197. private formatDuration(ms: number): string {
  1198. if (!ms) return '00:00';
  1199. const seconds = Math.floor(ms / 1000);
  1200. const minutes = Math.floor(seconds / 60);
  1201. const remainingSeconds = seconds % 60;
  1202. return `${minutes.toString().padStart(2, '0')}:${remainingSeconds.toString().padStart(2, '0')}`;
  1203. }
  1204. /**
  1205. * 获取B站账号信息
  1206. */
  1207. private async fetchBilibiliAccountInfo(
  1208. page: Page,
  1209. _context: BrowserContext,
  1210. cookies: CookieData[]
  1211. ): Promise<AccountInfo> {
  1212. let accountId = `bilibili_${Date.now()}`;
  1213. let accountName = 'B站账号';
  1214. let avatarUrl = '';
  1215. let fansCount: number | undefined;
  1216. let worksCount = 0;
  1217. try {
  1218. const uidCookie = cookies.find(c => c.name === 'DedeUserID');
  1219. if (uidCookie?.value) {
  1220. accountId = `bilibili_${uidCookie.value}`;
  1221. }
  1222. await page.goto('https://member.bilibili.com/platform/home', {
  1223. waitUntil: 'domcontentloaded',
  1224. timeout: 30000,
  1225. });
  1226. await page.waitForTimeout(3000);
  1227. // 获取用户名
  1228. const nameEl = await page.$('[class*="nickname"], .user-name, .uname');
  1229. if (nameEl) {
  1230. const text = await nameEl.textContent();
  1231. if (text?.trim()) {
  1232. accountName = text.trim();
  1233. }
  1234. }
  1235. // 获取头像
  1236. const avatarEl = await page.$('[class*="avatar"] img, .user-face img');
  1237. if (avatarEl) {
  1238. const src = await avatarEl.getAttribute('src');
  1239. if (src) {
  1240. avatarUrl = src;
  1241. }
  1242. }
  1243. } catch (error) {
  1244. logger.warn('Failed to fetch Bilibili account info:', error);
  1245. }
  1246. return { accountId, accountName, avatarUrl, fansCount, worksCount };
  1247. }
  1248. /**
  1249. * 获取快手账号信息
  1250. */
  1251. private async fetchKuaishouAccountInfo(
  1252. page: Page,
  1253. _context: BrowserContext,
  1254. cookies: CookieData[]
  1255. ): Promise<AccountInfo> {
  1256. let accountId = `kuaishou_${Date.now()}`;
  1257. let accountName = '快手账号';
  1258. let avatarUrl = '';
  1259. let fansCount: number | undefined;
  1260. let worksCount = 0;
  1261. try {
  1262. const uidCookie = cookies.find(c => c.name === 'userId' || c.name === 'kuaishou.server.web_st');
  1263. if (uidCookie?.value) {
  1264. accountId = `kuaishou_${uidCookie.value.slice(0, 20)}`;
  1265. }
  1266. await page.goto('https://cp.kuaishou.com/profile', {
  1267. waitUntil: 'domcontentloaded',
  1268. timeout: 30000,
  1269. });
  1270. await page.waitForTimeout(3000);
  1271. // 获取用户名
  1272. const nameEl = await page.$('[class*="nickname"], [class*="userName"]');
  1273. if (nameEl) {
  1274. const text = await nameEl.textContent();
  1275. if (text?.trim()) {
  1276. accountName = text.trim();
  1277. }
  1278. }
  1279. // 获取头像
  1280. const avatarEl = await page.$('[class*="avatar"] img');
  1281. if (avatarEl) {
  1282. const src = await avatarEl.getAttribute('src');
  1283. if (src) {
  1284. avatarUrl = src;
  1285. }
  1286. }
  1287. } catch (error) {
  1288. logger.warn('Failed to fetch Kuaishou account info:', error);
  1289. }
  1290. return { accountId, accountName, avatarUrl, fansCount, worksCount };
  1291. }
  1292. /**
  1293. * 获取微信视频号账号信息
  1294. */
  1295. private async fetchWeixinVideoAccountInfo(
  1296. page: Page,
  1297. _context: BrowserContext,
  1298. cookies: CookieData[]
  1299. ): Promise<AccountInfo> {
  1300. let accountId = `weixin_video_${Date.now()}`;
  1301. let accountName = '视频号账号';
  1302. let avatarUrl = '';
  1303. let fansCount: number | undefined;
  1304. let worksCount = 0;
  1305. let finderId = '';
  1306. let worksList: WorkItem[] = [];
  1307. let worksListComplete = false;
  1308. try {
  1309. // 从 Cookie 中提取用户标识
  1310. // 优先使用 finder_username(视频号唯一标识)
  1311. const finderUsernameCookie = cookies.find(c => c.name === 'finder_username');
  1312. if (finderUsernameCookie?.value) {
  1313. finderId = finderUsernameCookie.value;
  1314. accountId = `weixin_video_${finderId}`;
  1315. logger.info(`[WeixinVideo] Found finder_username from cookie: ${finderId}`);
  1316. }
  1317. // 备选:使用 wxuin 或 uin
  1318. if (!finderId) {
  1319. const uinCookie = cookies.find(c => c.name === 'wxuin' || c.name === 'uin');
  1320. if (uinCookie?.value) {
  1321. // 仍然保持时间戳格式,后续会尝试从页面获取真实 ID
  1322. accountId = `weixin_video_${uinCookie.value}`;
  1323. logger.info(`[WeixinVideo] Using uin from cookie: ${uinCookie.value}`);
  1324. }
  1325. }
  1326. // 访问视频号创作者平台首页
  1327. await page.goto('https://channels.weixin.qq.com/platform/home', {
  1328. waitUntil: 'domcontentloaded',
  1329. timeout: 30000,
  1330. });
  1331. await page.waitForTimeout(3000);
  1332. // 检查是否需要登录
  1333. const currentUrl = page.url();
  1334. if (currentUrl.includes('login') || currentUrl.includes('passport')) {
  1335. logger.warn('[WeixinVideo] Cookie expired, needs login');
  1336. return { accountId, accountName, avatarUrl, fansCount, worksCount, worksList };
  1337. }
  1338. // 从页面提取账号信息
  1339. const accountData = await page.evaluate(() => {
  1340. const result: { name?: string; avatar?: string; fans?: number; works?: number; finderId?: string } = {};
  1341. try {
  1342. // ===== 1. 优先使用精确选择器获取视频号 ID =====
  1343. // 方法1: 通过 #finder-uid-copy 的 data-clipboard-text 属性获取
  1344. const finderIdCopyEl = document.querySelector('#finder-uid-copy');
  1345. if (finderIdCopyEl) {
  1346. const clipboardText = finderIdCopyEl.getAttribute('data-clipboard-text');
  1347. if (clipboardText && clipboardText.length >= 10) {
  1348. result.finderId = clipboardText;
  1349. console.log('[WeixinVideo] Found finder ID from data-clipboard-text:', result.finderId);
  1350. } else {
  1351. // 备选:获取元素文本内容
  1352. const text = finderIdCopyEl.textContent?.trim();
  1353. if (text && /^[a-zA-Z0-9_]+$/.test(text) && text.length >= 10) {
  1354. result.finderId = text;
  1355. console.log('[WeixinVideo] Found finder ID from #finder-uid-copy text:', result.finderId);
  1356. }
  1357. }
  1358. }
  1359. // 方法2: 通过 .finder-uniq-id 选择器获取
  1360. if (!result.finderId) {
  1361. const finderUniqIdEl = document.querySelector('.finder-uniq-id');
  1362. if (finderUniqIdEl) {
  1363. const clipboardText = finderUniqIdEl.getAttribute('data-clipboard-text');
  1364. if (clipboardText && clipboardText.length >= 10) {
  1365. result.finderId = clipboardText;
  1366. console.log('[WeixinVideo] Found finder ID from .finder-uniq-id data-clipboard-text:', result.finderId);
  1367. } else {
  1368. const text = finderUniqIdEl.textContent?.trim();
  1369. if (text && /^[a-zA-Z0-9_]+$/.test(text) && text.length >= 10) {
  1370. result.finderId = text;
  1371. console.log('[WeixinVideo] Found finder ID from .finder-uniq-id text:', result.finderId);
  1372. }
  1373. }
  1374. }
  1375. }
  1376. // 方法3: 从页面文本中正则匹配
  1377. if (!result.finderId) {
  1378. const bodyText = document.body.innerText || '';
  1379. const finderIdPatterns = [
  1380. /视频号ID[::\s]*([a-zA-Z0-9_]+)/,
  1381. /视频号[::\s]*ID[::\s]*([a-zA-Z0-9_]+)/,
  1382. ];
  1383. for (const pattern of finderIdPatterns) {
  1384. const match = bodyText.match(pattern);
  1385. if (match && match[1] && match[1].length >= 10) {
  1386. result.finderId = match[1];
  1387. console.log('[WeixinVideo] Found finder ID from regex:', result.finderId);
  1388. break;
  1389. }
  1390. }
  1391. }
  1392. // ===== 2. 获取账号名称 =====
  1393. // 优先使用 h2.finder-nickname
  1394. const nicknameEl = document.querySelector('h2.finder-nickname') ||
  1395. document.querySelector('.finder-nickname');
  1396. if (nicknameEl) {
  1397. const text = nicknameEl.textContent?.trim();
  1398. if (text && text.length >= 2 && text.length <= 30) {
  1399. result.name = text;
  1400. console.log('[WeixinVideo] Found name from .finder-nickname:', result.name);
  1401. }
  1402. }
  1403. // 备选选择器
  1404. if (!result.name) {
  1405. const nameSelectors = [
  1406. '.account-name',
  1407. '[class*="nickname"]',
  1408. '[class*="userName"]',
  1409. ];
  1410. for (const selector of nameSelectors) {
  1411. const el = document.querySelector(selector);
  1412. const text = el?.textContent?.trim();
  1413. if (text && text.length >= 2 && text.length <= 30) {
  1414. result.name = text;
  1415. console.log('[WeixinVideo] Found name from selector:', selector, result.name);
  1416. break;
  1417. }
  1418. }
  1419. }
  1420. // ===== 3. 获取头像 =====
  1421. // 优先使用 img.avatar
  1422. const avatarEl = document.querySelector('img.avatar') as HTMLImageElement;
  1423. if (avatarEl?.src && avatarEl.src.startsWith('http')) {
  1424. result.avatar = avatarEl.src;
  1425. console.log('[WeixinVideo] Found avatar from img.avatar:', result.avatar);
  1426. }
  1427. // 备选选择器
  1428. if (!result.avatar) {
  1429. const avatarSelectors = [
  1430. '.finder-info-container img.avatar',
  1431. 'img[alt="视频号头像"]',
  1432. 'img[src*="wx.qlogo.cn/finderhead"]',
  1433. 'img[src*="wx.qlogo"]',
  1434. ];
  1435. for (const selector of avatarSelectors) {
  1436. const el = document.querySelector(selector) as HTMLImageElement;
  1437. if (el?.src && el.src.startsWith('http')) {
  1438. result.avatar = el.src;
  1439. console.log('[WeixinVideo] Found avatar from selector:', selector);
  1440. break;
  1441. }
  1442. }
  1443. }
  1444. // ===== 4. 获取视频数和关注者数 =====
  1445. // 使用 .finder-content-info 中的 .finder-info-num
  1446. const contentInfo = document.querySelector('.finder-content-info');
  1447. if (contentInfo) {
  1448. const infoDivs = contentInfo.querySelectorAll('div');
  1449. infoDivs.forEach(div => {
  1450. const text = div.textContent || '';
  1451. const numEl = div.querySelector('.finder-info-num');
  1452. if (numEl) {
  1453. const num = parseInt(numEl.textContent?.trim() || '0', 10);
  1454. if (text.includes('视频') || text.includes('作品')) {
  1455. result.works = num;
  1456. console.log('[WeixinVideo] Found works from .finder-info-num:', result.works);
  1457. } else if (text.includes('关注者') || text.includes('粉丝')) {
  1458. result.fans = num;
  1459. console.log('[WeixinVideo] Found fans from .finder-info-num:', result.fans);
  1460. }
  1461. }
  1462. });
  1463. }
  1464. // 备选:从页面整体文本中匹配
  1465. if (result.fans === undefined || result.works === undefined) {
  1466. const bodyText = document.body.innerText || '';
  1467. if (result.fans === undefined) {
  1468. const fansMatch = bodyText.match(/关注者\s*(\d+(?:\.\d+)?[万wW]?)/) ||
  1469. bodyText.match(/粉丝\s*(\d+(?:\.\d+)?[万wW]?)/);
  1470. if (fansMatch) {
  1471. let count = parseFloat(fansMatch[1]);
  1472. if (fansMatch[1].includes('万') || fansMatch[1].toLowerCase().includes('w')) {
  1473. count = count * 10000;
  1474. }
  1475. result.fans = Math.floor(count);
  1476. console.log('[WeixinVideo] Found fans from text:', result.fans);
  1477. }
  1478. }
  1479. if (result.works === undefined) {
  1480. const worksMatch = bodyText.match(/视频\s*(\d+)/) ||
  1481. bodyText.match(/作品\s*(\d+)/);
  1482. if (worksMatch) {
  1483. result.works = parseInt(worksMatch[1], 10);
  1484. console.log('[WeixinVideo] Found works from text:', result.works);
  1485. }
  1486. }
  1487. }
  1488. } catch (e) {
  1489. console.error('[WeixinVideo] Extract error:', e);
  1490. }
  1491. return result;
  1492. });
  1493. logger.info(`[WeixinVideo] Extracted account data from home page:`, accountData);
  1494. // 更新账号信息
  1495. if (accountData.name) {
  1496. accountName = accountData.name;
  1497. }
  1498. if (accountData.avatar) {
  1499. avatarUrl = accountData.avatar;
  1500. }
  1501. if (accountData.fans !== undefined) {
  1502. fansCount = accountData.fans;
  1503. }
  1504. if (accountData.works !== undefined) {
  1505. worksCount = accountData.works;
  1506. }
  1507. if (accountData.finderId) {
  1508. finderId = accountData.finderId;
  1509. accountId = `weixin_video_${accountData.finderId}`;
  1510. }
  1511. // 如果首页没有获取到视频号 ID,尝试访问账号设置页面
  1512. if (!finderId || finderId.length < 10) {
  1513. logger.info('[WeixinVideo] Finder ID not found on home page, trying account settings page...');
  1514. try {
  1515. // 访问账号设置页面
  1516. await page.goto('https://channels.weixin.qq.com/platform/account', {
  1517. waitUntil: 'domcontentloaded',
  1518. timeout: 30000,
  1519. });
  1520. await page.waitForTimeout(2000);
  1521. // 从账号设置页面提取视频号 ID
  1522. const settingsData = await page.evaluate(() => {
  1523. const result: { finderId?: string; name?: string } = {};
  1524. const bodyText = document.body.innerText || '';
  1525. // 尝试多种匹配模式
  1526. const patterns = [
  1527. /视频号ID[::\s]*([a-zA-Z0-9_]+)/,
  1528. /视频号[::\s]*ID[::\s]*([a-zA-Z0-9_]+)/,
  1529. /视频号[::\s]+([a-zA-Z0-9_]{10,})/,
  1530. /Finder\s*ID[::\s]*([a-zA-Z0-9_]+)/i,
  1531. /finder_username[::\s]*([a-zA-Z0-9_]+)/i,
  1532. /唯一标识[::\s]*([a-zA-Z0-9_]+)/,
  1533. ];
  1534. for (const pattern of patterns) {
  1535. const match = bodyText.match(pattern);
  1536. if (match && match[1]) {
  1537. result.finderId = match[1];
  1538. console.log('[WeixinVideo] Found finder ID from settings page:', result.finderId);
  1539. break;
  1540. }
  1541. }
  1542. // 从元素中查找
  1543. if (!result.finderId) {
  1544. const idSelectors = [
  1545. '[class*="finder-id"]',
  1546. '[class*="account-id"]',
  1547. '[class*="unique-id"]',
  1548. '.finder-uniq-id',
  1549. 'span.finder-uniq-id',
  1550. '[class*="copy-id"]',
  1551. ];
  1552. for (const selector of idSelectors) {
  1553. const el = document.querySelector(selector);
  1554. const text = el?.textContent?.trim();
  1555. if (text && /^[a-zA-Z0-9_]+$/.test(text) && text.length >= 10 && text.length <= 30) {
  1556. result.finderId = text;
  1557. console.log('[WeixinVideo] Found ID from settings selector:', result.finderId);
  1558. break;
  1559. }
  1560. }
  1561. }
  1562. return result;
  1563. });
  1564. logger.info(`[WeixinVideo] Extracted data from settings page:`, settingsData);
  1565. if (settingsData.finderId) {
  1566. finderId = settingsData.finderId;
  1567. accountId = `weixin_video_${settingsData.finderId}`;
  1568. }
  1569. } catch (settingsError) {
  1570. logger.warn('[WeixinVideo] Failed to fetch from settings page:', settingsError);
  1571. }
  1572. }
  1573. try {
  1574. const worksResult = await this.fetchWeixinVideoWorksList(page, worksCount);
  1575. worksList = worksResult.works;
  1576. worksListComplete = worksResult.complete;
  1577. if (worksResult.total > 0) {
  1578. worksCount = worksResult.total;
  1579. } else if (worksList.length > 0 && worksCount === 0) {
  1580. worksCount = worksList.length;
  1581. }
  1582. logger.info(
  1583. `[WeixinVideo] Works fetched: list=${worksList.length}, total=${worksCount}, withCover=${worksList.filter((work) => !!work.coverUrl).length}`
  1584. );
  1585. } catch (worksError) {
  1586. logger.warn('[WeixinVideo] Failed to fetch works list:', worksError);
  1587. }
  1588. logger.info(`[WeixinVideo] Final account info: id=${accountId}, name=${accountName}, avatar=${avatarUrl ? 'yes' : 'no'}, fans=${fansCount}`);
  1589. } catch (error) {
  1590. logger.warn('Failed to fetch WeixinVideo account info:', error);
  1591. }
  1592. return { accountId, accountName, avatarUrl, fansCount, worksCount, worksList, worksListComplete };
  1593. }
  1594. private async fetchWeixinVideoWorksList(
  1595. page: Page,
  1596. declaredTotal = 0
  1597. ): Promise<{ works: WorkItem[]; total: number; complete: boolean }> {
  1598. const capturedWorks = new Map<string, WorkItem>();
  1599. let total = declaredTotal || 0;
  1600. let lastBuff = '';
  1601. let continueFlag = false;
  1602. let postListRequest: { url: string; body: Record<string, unknown> } | null = null;
  1603. const collectPosts = (payload: unknown, source: string) => {
  1604. const posts = this.extractWeixinVideoPostList(payload);
  1605. if (posts.length === 0) return;
  1606. logger.info(`[WeixinVideo Works] Captured ${posts.length} posts from ${source}`);
  1607. for (const post of posts) {
  1608. const work = this.mapWeixinVideoPostToWorkItem(post);
  1609. if (!work) continue;
  1610. const key = work.videoId || `${work.title}_${work.publishTime}`;
  1611. if (!capturedWorks.has(key)) {
  1612. capturedWorks.set(key, work);
  1613. } else {
  1614. const existing = capturedWorks.get(key)!;
  1615. capturedWorks.set(key, {
  1616. ...existing,
  1617. ...work,
  1618. coverUrl: work.coverUrl || existing.coverUrl,
  1619. videoUrl: work.videoUrl || existing.videoUrl,
  1620. });
  1621. }
  1622. }
  1623. const maybeTotal = this.extractWeixinVideoTotal(payload);
  1624. if (maybeTotal > 0) total = maybeTotal;
  1625. const pagination = this.extractWeixinVideoPagination(payload);
  1626. if (pagination.lastBuff) lastBuff = pagination.lastBuff;
  1627. continueFlag = pagination.continueFlag;
  1628. };
  1629. const requestHandler = async (request: import('playwright').Request) => {
  1630. const url = request.url();
  1631. if (!url.includes('/mmfinderassistant-bin/post/post_list') || postListRequest) return;
  1632. const body = this.parseJsonObject(request.postData());
  1633. if (body) {
  1634. postListRequest = { url, body };
  1635. }
  1636. };
  1637. const responseHandler = async (response: import('playwright').Response) => {
  1638. const url = response.url();
  1639. if (!url.includes('/mmfinderassistant-bin/post/post_list')) return;
  1640. try {
  1641. const data = await response.json();
  1642. collectPosts(data, url);
  1643. } catch (error) {
  1644. logger.warn(`[WeixinVideo Works] Failed to parse post_list response: ${url}`, error);
  1645. }
  1646. };
  1647. page.on('request', requestHandler);
  1648. page.on('response', responseHandler);
  1649. try {
  1650. const candidateUrls = [
  1651. 'https://channels.weixin.qq.com/platform/content/post/list',
  1652. 'https://channels.weixin.qq.com/micro/content/post/list',
  1653. ];
  1654. for (const url of candidateUrls) {
  1655. try {
  1656. logger.info(`[WeixinVideo Works] Navigating to ${url}`);
  1657. await page.goto(url, { waitUntil: 'domcontentloaded', timeout: 30000 });
  1658. await page.waitForTimeout(5000);
  1659. const currentUrl = page.url();
  1660. if (currentUrl.includes('login') || currentUrl.includes('passport')) {
  1661. logger.warn(`[WeixinVideo Works] Login page detected after navigating to works list: ${currentUrl}`);
  1662. break;
  1663. }
  1664. for (let i = 0; i < 6; i += 1) {
  1665. await page.evaluate(() => {
  1666. const scrollables = Array.from(document.querySelectorAll<HTMLElement>('*'))
  1667. .filter((element) => element.scrollHeight > element.clientHeight + 100);
  1668. const target = scrollables.sort((a, b) => b.scrollHeight - a.scrollHeight)[0];
  1669. if (target) {
  1670. target.scrollTop = target.scrollHeight;
  1671. } else {
  1672. window.scrollBy(0, 800);
  1673. }
  1674. });
  1675. await page.waitForTimeout(800);
  1676. }
  1677. if (capturedWorks.size > 0) break;
  1678. } catch (error) {
  1679. logger.warn(`[WeixinVideo Works] Failed to navigate ${url}:`, error);
  1680. }
  1681. }
  1682. if (capturedWorks.size > 0 && postListRequest && continueFlag && lastBuff) {
  1683. const pageWorks = await this.fetchRemainingWeixinVideoWorkPages({
  1684. page,
  1685. request: postListRequest,
  1686. firstLastBuff: lastBuff,
  1687. declaredTotal: total,
  1688. existingCount: capturedWorks.size,
  1689. collectPosts,
  1690. });
  1691. total = Math.max(total, pageWorks.total);
  1692. continueFlag = pageWorks.continueFlag;
  1693. }
  1694. } finally {
  1695. page.off('request', requestHandler);
  1696. page.off('response', responseHandler);
  1697. }
  1698. if (capturedWorks.size === 0) {
  1699. const domWorks = await this.extractWeixinVideoWorksFromDom(page);
  1700. for (const work of domWorks) {
  1701. const key = work.videoId || `${work.title}_${work.publishTime}`;
  1702. if (!capturedWorks.has(key)) capturedWorks.set(key, work);
  1703. }
  1704. }
  1705. return {
  1706. works: Array.from(capturedWorks.values()),
  1707. total: Math.max(total, capturedWorks.size),
  1708. complete: total > 0 ? capturedWorks.size >= total || !continueFlag : capturedWorks.size > 0,
  1709. };
  1710. }
  1711. private parseJsonObject(value: string | null | undefined): Record<string, unknown> | null {
  1712. if (!value) return null;
  1713. try {
  1714. const parsed = JSON.parse(value);
  1715. return parsed && typeof parsed === 'object' && !Array.isArray(parsed)
  1716. ? parsed as Record<string, unknown>
  1717. : null;
  1718. } catch {
  1719. return null;
  1720. }
  1721. }
  1722. private async fetchRemainingWeixinVideoWorkPages(options: {
  1723. page: Page;
  1724. request: { url: string; body: Record<string, unknown> };
  1725. firstLastBuff: string;
  1726. declaredTotal: number;
  1727. existingCount: number;
  1728. collectPosts: (payload: unknown, source: string) => void;
  1729. }): Promise<{ total: number; continueFlag: boolean }> {
  1730. let lastBuff = options.firstLastBuff;
  1731. let total = options.declaredTotal;
  1732. let continueFlag = true;
  1733. let previousCount = options.existingCount;
  1734. const rawPageSize = Number(options.request.body.pageSize);
  1735. const pageSize = Number.isFinite(rawPageSize) && rawPageSize > 0 ? rawPageSize : 5;
  1736. const rawCurrentPage = Number(options.request.body.currentPage);
  1737. let currentPage = Number.isFinite(rawCurrentPage) && rawCurrentPage > 0 ? rawCurrentPage + 1 : 2;
  1738. const maxPagesByTotal = total > 0 ? Math.ceil(total / pageSize) : 20;
  1739. const maxPages = Math.min(Math.max(maxPagesByTotal, currentPage), 30);
  1740. while (continueFlag && lastBuff && currentPage <= maxPages) {
  1741. const body = {
  1742. ...options.request.body,
  1743. currentPage,
  1744. rawKeyBuff: lastBuff,
  1745. timestamp: String(Date.now()),
  1746. };
  1747. try {
  1748. const response = await options.page.request.post(options.request.url, {
  1749. data: body,
  1750. headers: {
  1751. accept: 'application/json, text/plain, */*',
  1752. 'content-type': 'application/json;charset=UTF-8',
  1753. referer: 'https://channels.weixin.qq.com/platform/content/post/list',
  1754. },
  1755. timeout: 30_000,
  1756. });
  1757. const payload = await response.json();
  1758. options.collectPosts(payload, `${options.request.url}#page=${currentPage}`);
  1759. const maybeTotal = this.extractWeixinVideoTotal(payload);
  1760. if (maybeTotal > 0) total = maybeTotal;
  1761. const pagination = this.extractWeixinVideoPagination(payload);
  1762. continueFlag = pagination.continueFlag;
  1763. if (!pagination.lastBuff || pagination.lastBuff === lastBuff) {
  1764. break;
  1765. }
  1766. lastBuff = pagination.lastBuff;
  1767. const currentCount = previousCount + this.extractWeixinVideoPostList(payload).length;
  1768. if (currentCount === previousCount) {
  1769. break;
  1770. }
  1771. previousCount = currentCount;
  1772. currentPage += 1;
  1773. } catch (error) {
  1774. logger.warn(`[WeixinVideo Works] Failed to fetch post_list page ${currentPage}:`, error);
  1775. break;
  1776. }
  1777. }
  1778. return { total, continueFlag };
  1779. }
  1780. private extractWeixinVideoPostList(payload: unknown): Record<string, unknown>[] {
  1781. const data = payload as any;
  1782. const candidates = [
  1783. data?.data?.list,
  1784. data?.data?.posts,
  1785. data?.data?.postList,
  1786. data?.data?.post_list,
  1787. data?.data?.objectList,
  1788. data?.data?.object_list,
  1789. data?.data?.items,
  1790. data?.list,
  1791. data?.posts,
  1792. data?.objectList,
  1793. data?.object_list,
  1794. ];
  1795. for (const candidate of candidates) {
  1796. if (Array.isArray(candidate)) {
  1797. return candidate.filter((item): item is Record<string, unknown> => !!item && typeof item === 'object');
  1798. }
  1799. }
  1800. return [];
  1801. }
  1802. private extractWeixinVideoTotal(payload: unknown): number {
  1803. const data = payload as any;
  1804. const candidates = [
  1805. data?.data?.total,
  1806. data?.data?.totalCount,
  1807. data?.data?.total_count,
  1808. data?.data?.count,
  1809. data?.total,
  1810. data?.totalCount,
  1811. data?.total_count,
  1812. ];
  1813. for (const candidate of candidates) {
  1814. const value = Number(candidate);
  1815. if (Number.isFinite(value) && value > 0) return value;
  1816. }
  1817. return 0;
  1818. }
  1819. private extractWeixinVideoPagination(payload: unknown): { lastBuff: string; continueFlag: boolean } {
  1820. const data = payload as any;
  1821. const lastBuff = [
  1822. data?.data?.lastBuff,
  1823. data?.data?.last_buff,
  1824. data?.lastBuff,
  1825. data?.last_buff,
  1826. ].find((value) => typeof value === 'string' && value.length > 0) || '';
  1827. const rawContinue = data?.data?.continueFlag
  1828. ?? data?.data?.continue_flag
  1829. ?? data?.data?.hasMore
  1830. ?? data?.data?.has_more
  1831. ?? data?.continueFlag
  1832. ?? data?.hasMore;
  1833. return {
  1834. lastBuff,
  1835. continueFlag: rawContinue === true || rawContinue === 1 || rawContinue === '1' || rawContinue === 'true',
  1836. };
  1837. }
  1838. private mapWeixinVideoPostToWorkItem(post: Record<string, unknown>): WorkItem | null {
  1839. const readTextValue = (value: unknown, depth = 0): string => {
  1840. if (depth > 4 || value === undefined || value === null) return '';
  1841. if (typeof value === 'string') return value.trim();
  1842. if (typeof value === 'number' && Number.isFinite(value)) return String(value);
  1843. if (Array.isArray(value)) {
  1844. for (const item of value) {
  1845. const text = readTextValue(item, depth + 1);
  1846. if (text) return text;
  1847. }
  1848. return '';
  1849. }
  1850. if (typeof value !== 'object') return '';
  1851. const record = value as Record<string, unknown>;
  1852. const textKeys = ['description', 'desc', 'title', 'content', 'text', 'wording', 'wordingText', 'name'];
  1853. for (const key of textKeys) {
  1854. const text = readTextValue(record[key], depth + 1);
  1855. if (text) return text;
  1856. }
  1857. return '';
  1858. };
  1859. const readString = (...paths: string[][]): string => {
  1860. for (const path of paths) {
  1861. let value: unknown = post;
  1862. for (const key of path) {
  1863. value = value && typeof value === 'object' ? (value as Record<string, unknown>)[key] : undefined;
  1864. }
  1865. const text = readTextValue(value);
  1866. if (text) return text;
  1867. }
  1868. return '';
  1869. };
  1870. const readNumber = (...paths: string[][]): number => {
  1871. for (const path of paths) {
  1872. const raw = readString(path);
  1873. if (!raw) continue;
  1874. const normalized = raw.replace(/[,\s]/g, '');
  1875. const value = Number(normalized.includes('万') ? parseFloat(normalized) * 10000 : normalized);
  1876. if (Number.isFinite(value)) return Math.floor(value);
  1877. }
  1878. return 0;
  1879. };
  1880. const videoId = readString(
  1881. ['objectId'],
  1882. ['object_id'],
  1883. ['exportId'],
  1884. ['export_id'],
  1885. ['feedId'],
  1886. ['feed_id'],
  1887. ['postId'],
  1888. ['post_id'],
  1889. ['id'],
  1890. ['objectNonce'],
  1891. ['object_nonce']
  1892. );
  1893. const title = readString(
  1894. ['title'],
  1895. ['desc', 'description'],
  1896. ['desc', 'desc'],
  1897. ['desc', 'title'],
  1898. ['desc', 'content'],
  1899. ['desc', 'text'],
  1900. ['description'],
  1901. ['objectDesc', 'description'],
  1902. ['object_desc', 'description'],
  1903. ['wording']
  1904. ) || '无标题';
  1905. if (!videoId && !title) return null;
  1906. const publishTimeRaw = readString(
  1907. ['createTime'],
  1908. ['create_time'],
  1909. ['createtime'],
  1910. ['publishTime'],
  1911. ['publish_time'],
  1912. ['objectDesc', 'createTime'],
  1913. ['object_desc', 'createTime']
  1914. );
  1915. const publishTime = this.normalizePlatformPublishTime(publishTimeRaw);
  1916. const coverUrl = extractPlatformWorkCoverUrl(post);
  1917. const videoUrl = videoId ? `https://channels.weixin.qq.com/platform/post/${videoId}` : '';
  1918. return {
  1919. videoId,
  1920. title,
  1921. coverUrl,
  1922. videoUrl,
  1923. duration: '00:00',
  1924. publishTime,
  1925. status: 'published',
  1926. playCount: readNumber(['playCount'], ['play_count'], ['readCount'], ['read_count'], ['statistics', 'playCount']),
  1927. likeCount: readNumber(['likeCount'], ['like_count'], ['favCount'], ['fav_count'], ['statistics', 'likeCount']),
  1928. commentCount: readNumber(['commentCount'], ['comment_count'], ['statistics', 'commentCount']),
  1929. shareCount: readNumber(['shareCount'], ['share_count'], ['statistics', 'shareCount']),
  1930. };
  1931. }
  1932. private normalizePlatformPublishTime(raw: string): string {
  1933. if (!raw) return new Date().toISOString();
  1934. const numeric = Number(raw);
  1935. if (Number.isFinite(numeric) && numeric > 0) {
  1936. const timestamp = numeric > 10_000_000_000 ? numeric : numeric * 1000;
  1937. return new Date(timestamp).toISOString();
  1938. }
  1939. const parsed = new Date(raw);
  1940. if (!Number.isNaN(parsed.getTime())) return parsed.toISOString();
  1941. return new Date().toISOString();
  1942. }
  1943. private async extractWeixinVideoWorksFromDom(page: Page): Promise<WorkItem[]> {
  1944. try {
  1945. const rows = await page.evaluate(() => {
  1946. const result: Array<{
  1947. title: string;
  1948. coverUrl: string;
  1949. publishTime: string;
  1950. playCount: number;
  1951. likeCount: number;
  1952. commentCount: number;
  1953. }> = [];
  1954. const cards = Array.from(document.querySelectorAll<HTMLElement>(
  1955. '[class*="post"], [class*="video"], [class*="work"], [class*="feed"]'
  1956. )).slice(0, 80);
  1957. for (const card of cards) {
  1958. const text = card.innerText?.trim() || '';
  1959. if (!text || text.length < 2) continue;
  1960. const img = card.querySelector<HTMLImageElement>('img[src^="http"], img[src^="//"]');
  1961. const title =
  1962. card.querySelector<HTMLElement>('[class*="title"], [class*="desc"], [class*="name"]')?.innerText?.trim() ||
  1963. text.split('\n').map((line) => line.trim()).find((line) => line.length > 1 && line.length < 80) ||
  1964. '';
  1965. if (!title && !img?.src) continue;
  1966. result.push({
  1967. title: title || '无标题',
  1968. coverUrl: img?.src || '',
  1969. publishTime: '',
  1970. playCount: 0,
  1971. likeCount: 0,
  1972. commentCount: 0,
  1973. });
  1974. }
  1975. return result;
  1976. });
  1977. return rows
  1978. .filter((row) => row.title || row.coverUrl)
  1979. .map((row, index) => ({
  1980. videoId: `weixin_video_dom_${Date.now()}_${index}`,
  1981. title: row.title || '无标题',
  1982. coverUrl: extractPlatformWorkCoverUrl(row),
  1983. videoUrl: '',
  1984. duration: '00:00',
  1985. publishTime: row.publishTime || new Date().toISOString(),
  1986. status: 'published',
  1987. playCount: row.playCount || 0,
  1988. likeCount: row.likeCount || 0,
  1989. commentCount: row.commentCount || 0,
  1990. shareCount: 0,
  1991. }));
  1992. } catch (error) {
  1993. logger.warn('[WeixinVideo Works] DOM fallback failed:', error);
  1994. return [];
  1995. }
  1996. }
  1997. /**
  1998. * 获取小红书账号信息 - 通过 API 方式获取
  1999. */
  2000. private async fetchXiaohongshuAccountInfo(
  2001. page: Page,
  2002. _context: BrowserContext,
  2003. cookies: CookieData[]
  2004. ): Promise<AccountInfo> {
  2005. let accountId = `xiaohongshu_${Date.now()}`;
  2006. let accountName = '小红书账号';
  2007. let avatarUrl = '';
  2008. let fansCount: number | undefined;
  2009. let worksCount = 0;
  2010. // 用于存储捕获的数据
  2011. const capturedData: {
  2012. userInfo?: {
  2013. nickname?: string;
  2014. avatar?: string;
  2015. userId?: string;
  2016. redId?: string;
  2017. fans?: number;
  2018. notes?: number;
  2019. };
  2020. } = {};
  2021. try {
  2022. // 从 Cookie 获取用户 ID
  2023. const userIdCookie = cookies.find(c =>
  2024. c.name === 'customer_id' || c.name === 'user_id' || c.name === 'web_session'
  2025. );
  2026. if (userIdCookie?.value) {
  2027. accountId = `xiaohongshu_${userIdCookie.value.slice(0, 20)}`;
  2028. }
  2029. // 设置 API 响应监听器
  2030. const xhsAccountResponseHandler = async (response: any) => {
  2031. const url = response.url();
  2032. try {
  2033. // 监听用户信息 API
  2034. if (url.includes('/api/galaxy/creator/home/personal_info') ||
  2035. url.includes('/api/sns/web/v1/user/selfinfo') ||
  2036. url.includes('/user/selfinfo')) {
  2037. const data = await response.json();
  2038. logger.info(`[Xiaohongshu API] User info response:`, safeStringify(data));
  2039. const profile = extractXiaohongshuProfileInfo(data);
  2040. if (profile.name || profile.redNum || profile.userId || profile.fansCount !== undefined) {
  2041. capturedData.userInfo = {
  2042. nickname: profile.name,
  2043. avatar: profile.avatar,
  2044. userId: profile.userId,
  2045. redId: profile.redNum,
  2046. fans: profile.fansCount,
  2047. notes: profile.worksCount,
  2048. };
  2049. logger.info(`[Xiaohongshu API] Captured user info:`, capturedData.userInfo);
  2050. }
  2051. }
  2052. // 监听创作者主页数据
  2053. if (url.includes('/api/galaxy/creator/home/home_page') ||
  2054. url.includes('/api/galaxy/creator/data')) {
  2055. const data = await response.json();
  2056. logger.info(`[Xiaohongshu API] Creator home response:`, safeStringify(data));
  2057. if (data?.data) {
  2058. const homeData = extractXiaohongshuProfileInfo(data);
  2059. if (homeData.fansCount !== undefined) {
  2060. capturedData.userInfo = capturedData.userInfo || {};
  2061. capturedData.userInfo.fans = homeData.fansCount;
  2062. }
  2063. if (homeData.worksCount !== undefined) {
  2064. capturedData.userInfo = capturedData.userInfo || {};
  2065. capturedData.userInfo.notes = homeData.worksCount;
  2066. }
  2067. if (homeData.name) {
  2068. capturedData.userInfo = capturedData.userInfo || {};
  2069. capturedData.userInfo.nickname = capturedData.userInfo.nickname || homeData.name;
  2070. }
  2071. if (homeData.avatar) {
  2072. capturedData.userInfo = capturedData.userInfo || {};
  2073. capturedData.userInfo.avatar = capturedData.userInfo.avatar || homeData.avatar;
  2074. }
  2075. if (homeData.redNum || homeData.userId) {
  2076. capturedData.userInfo = capturedData.userInfo || {};
  2077. capturedData.userInfo.redId = capturedData.userInfo.redId || homeData.redNum;
  2078. capturedData.userInfo.userId = capturedData.userInfo.userId || homeData.userId;
  2079. }
  2080. }
  2081. }
  2082. } catch {
  2083. // 忽略非 JSON 响应
  2084. }
  2085. };
  2086. page.on('response', xhsAccountResponseHandler);
  2087. // 导航到小红书创作者中心
  2088. logger.info('[Xiaohongshu] Navigating to creator center...');
  2089. await page.goto('https://creator.xiaohongshu.com/creator/home', {
  2090. waitUntil: 'domcontentloaded',
  2091. timeout: 30000,
  2092. });
  2093. // 等待页面加载
  2094. await page.waitForTimeout(3000);
  2095. // 检查是否需要登录
  2096. const currentUrl = page.url();
  2097. if (currentUrl.includes('login') || currentUrl.includes('passport')) {
  2098. logger.warn('[Xiaohongshu] Cookie expired, needs login');
  2099. // 返回空信息,fansCount 为 undefined,避免重置为 0
  2100. return { accountId, accountName, avatarUrl, fansCount: undefined, worksCount: 0 };
  2101. }
  2102. // 等待 API 响应
  2103. await page.waitForTimeout(3000);
  2104. // 如果 API 没有捕获到数据,尝试从页面提取
  2105. if (!capturedData.userInfo?.nickname) {
  2106. logger.info('[Xiaohongshu] API did not return data, extracting from page...');
  2107. // 尝试获取用户名
  2108. const nameSelectors = [
  2109. '[class*="nickname"]',
  2110. '[class*="user-name"]',
  2111. '[class*="userName"]',
  2112. '.user-info .name',
  2113. '[class*="creator"] [class*="name"]',
  2114. ];
  2115. for (const selector of nameSelectors) {
  2116. const el = await page.$(selector);
  2117. if (el) {
  2118. const text = await el.textContent();
  2119. if (text?.trim() && text.trim().length < 50) {
  2120. accountName = text.trim();
  2121. logger.info(`[Xiaohongshu] Found name from page: ${accountName}`);
  2122. break;
  2123. }
  2124. }
  2125. }
  2126. // 尝试获取头像
  2127. const avatarSelectors = [
  2128. '[class*="avatar"] img',
  2129. '[class*="user-avatar"] img',
  2130. '.user-info img',
  2131. '[class*="creator"] img[src*="sns"]',
  2132. ];
  2133. for (const selector of avatarSelectors) {
  2134. const el = await page.$(selector);
  2135. if (el) {
  2136. const src = await el.getAttribute('src');
  2137. if (src && src.startsWith('http')) {
  2138. avatarUrl = src;
  2139. logger.info(`[Xiaohongshu] Found avatar from page: ${avatarUrl.slice(0, 50)}...`);
  2140. break;
  2141. }
  2142. }
  2143. }
  2144. // 尝试获取粉丝数
  2145. const statsText = await page.textContent('body');
  2146. const fansMatch = statsText?.match(/粉丝[::\s]*(\d+(?:\.\d+)?[万亿]?)/);
  2147. if (fansMatch) {
  2148. fansCount = this.parseChineseNumber(fansMatch[1]);
  2149. logger.info(`[Xiaohongshu] Found fans count: ${fansCount}`);
  2150. }
  2151. const notesMatch = statsText?.match(/笔记[::\s]*(\d+)/);
  2152. if (notesMatch) {
  2153. worksCount = parseInt(notesMatch[1], 10);
  2154. logger.info(`[Xiaohongshu] Found notes count: ${worksCount}`);
  2155. }
  2156. }
  2157. // 使用捕获的数据
  2158. if (capturedData.userInfo) {
  2159. if (capturedData.userInfo.nickname) {
  2160. accountName = capturedData.userInfo.nickname;
  2161. }
  2162. if (capturedData.userInfo.avatar) {
  2163. avatarUrl = capturedData.userInfo.avatar;
  2164. }
  2165. // 优先使用小红书号(redId)作为 accountId
  2166. if (capturedData.userInfo.redId) {
  2167. accountId = `xiaohongshu_${capturedData.userInfo.redId}`;
  2168. logger.info(`[Xiaohongshu] Using redId as accountId: ${accountId}`);
  2169. } else if (capturedData.userInfo.userId) {
  2170. accountId = `xiaohongshu_${capturedData.userInfo.userId}`;
  2171. }
  2172. if (capturedData.userInfo.fans !== undefined) {
  2173. fansCount = capturedData.userInfo.fans;
  2174. }
  2175. if (capturedData.userInfo.notes !== undefined) {
  2176. worksCount = capturedData.userInfo.notes;
  2177. }
  2178. }
  2179. if (fansCount === undefined) {
  2180. const dataPageFansCount = await this.fetchXiaohongshuFansCountFromDataPage(page);
  2181. if (dataPageFansCount !== undefined) {
  2182. fansCount = dataPageFansCount;
  2183. logger.info(`[Xiaohongshu] Found fans count from fans data page: ${fansCount}`);
  2184. }
  2185. }
  2186. // 如果还没获取到小红书号,尝试从页面文本中提取
  2187. if (!accountId.match(/xiaohongshu_[a-zA-Z0-9_]+/) || accountId.includes('_' + Date.now().toString().slice(0, 8))) {
  2188. const bodyText = await page.textContent('body');
  2189. // 匹配小红书号格式:小红书号:xxxxxxx
  2190. const xhsIdMatch = bodyText?.match(/小红书号[::]\s*([a-zA-Z0-9_]+)/) ||
  2191. bodyText?.match(/红书号[::]\s*([a-zA-Z0-9_]+)/);
  2192. if (xhsIdMatch) {
  2193. accountId = `xiaohongshu_${xhsIdMatch[1]}`;
  2194. logger.info(`[Xiaohongshu] Found 小红书号 from page text: ${accountId}`);
  2195. }
  2196. }
  2197. logger.info(`[Xiaohongshu] Account info: id=${accountId}, name=${accountName}, fans=${fansCount}, works=${worksCount}`);
  2198. // 获取作品列表 - 通过监听 API 接口
  2199. const worksList: WorkItem[] = [];
  2200. let worksListComplete: boolean | undefined;
  2201. try {
  2202. logger.info('[Xiaohongshu] Navigating to note manager page to fetch works...');
  2203. // 存储所有捕获的笔记数据
  2204. const allNotesData: Array<{
  2205. noteId: string;
  2206. title: string;
  2207. coverUrl: string;
  2208. status: number;
  2209. publishTime: string;
  2210. type: string;
  2211. duration: number;
  2212. likeCount: number;
  2213. commentCount: number;
  2214. collectCount: number;
  2215. viewCount: number;
  2216. shareCount: number;
  2217. }> = [];
  2218. // Bug #6071: 提高分页上限(原 120 页 = 2400 条可能不够),改为 200 页 = 4000 条
  2219. let maxPages = 200;
  2220. // 设置 API 响应监听器 - 在导航之前绑定
  2221. let apiResponseReceived = false;
  2222. let totalNotesCount = 0; // 从 tags 中获取的总作品数
  2223. let stoppedByMaxPages = false;
  2224. const seenNoteIds = new Set<string>();
  2225. const upsertNotesFromPayload = (payload: any) => {
  2226. if (!payload) return;
  2227. const declaredTotal = extractDeclaredNotesCountFromPostedResponse(payload);
  2228. if (declaredTotal > 0) {
  2229. totalNotesCount = Math.max(totalNotesCount, declaredTotal);
  2230. }
  2231. if (totalNotesCount > 0) {
  2232. const estimatedPages = Math.ceil(totalNotesCount / 20) + 5;
  2233. maxPages = Math.max(maxPages, Math.min(500, estimatedPages));
  2234. }
  2235. const notes = payload.notes || [];
  2236. for (const note of notes) {
  2237. const noteId = note.id || '';
  2238. if (!noteId || seenNoteIds.has(noteId)) continue;
  2239. seenNoteIds.add(noteId);
  2240. let coverUrl = note.images_list?.[0]?.url || '';
  2241. if (coverUrl.startsWith('http://')) {
  2242. coverUrl = coverUrl.replace('http://', 'https://');
  2243. }
  2244. const duration = note.video_info?.duration || 0;
  2245. allNotesData.push({
  2246. noteId,
  2247. title: note.display_title || '',
  2248. coverUrl,
  2249. status: note.tab_status || 1,
  2250. publishTime: note.time || '',
  2251. type: note.type || 'normal',
  2252. duration,
  2253. likeCount: note.likes || 0,
  2254. commentCount: note.comments_count || 0,
  2255. collectCount: note.collected_count || 0,
  2256. viewCount: note.view_count || 0,
  2257. shareCount: note.shared_count || 0,
  2258. });
  2259. }
  2260. };
  2261. const fetchNotesPage = async (pageNum: number) => {
  2262. return await page.evaluate(async (p) => {
  2263. // #6071: 添加 page_size=20 确保每页返回足够多的笔记(默认可能只有10条)
  2264. const response = await fetch(
  2265. `https://edith.xiaohongshu.com/web_api/sns/v5/creator/note/user/posted?tab=0&page=${p}&page_size=20`,
  2266. {
  2267. method: 'GET',
  2268. credentials: 'include',
  2269. headers: {
  2270. Accept: 'application/json',
  2271. },
  2272. }
  2273. );
  2274. return await response.json();
  2275. }, pageNum);
  2276. };
  2277. const notesApiHandler = async (response: import('playwright').Response) => {
  2278. const url = response.url();
  2279. try {
  2280. // 监听小红书笔记列表 API
  2281. // API: https://edith.xiaohongshu.com/web_api/sns/v5/creator/note/user/posted?tab=0&page=X
  2282. if (url.includes('/web_api/sns/v5/creator/note/user/posted') ||
  2283. url.includes('/api/sns/v5/creator/note/user/posted') ||
  2284. url.includes('creator/note/user/posted')) {
  2285. const data = await response.json();
  2286. logger.info(`[Xiaohongshu API] Notes list response: success=${data?.success}, code=${data?.code}, notes count=${data?.data?.notes?.length || 0}`);
  2287. if ((data?.success || data?.code === 0) && data?.data) {
  2288. apiResponseReceived = true;
  2289. upsertNotesFromPayload(data.data);
  2290. } else {
  2291. }
  2292. }
  2293. } catch (e) {
  2294. // 只在有相关 URL 时打印警告
  2295. if (url.includes('creator/note')) {
  2296. logger.warn('[Xiaohongshu API] Failed to parse notes response:', e);
  2297. }
  2298. }
  2299. };
  2300. // 先绑定监听器
  2301. page.on('response', notesApiHandler);
  2302. logger.info('[Xiaohongshu] API listener registered, navigating to note manager...');
  2303. // 导航到笔记管理页面 - 使用 domcontentloaded 加快加载,避免 networkidle 超时
  2304. try {
  2305. await page.goto('https://creator.xiaohongshu.com/new/note-manager', {
  2306. waitUntil: 'domcontentloaded',
  2307. timeout: 30000,
  2308. });
  2309. } catch (navError) {
  2310. // 导航超时不影响已捕获的 API 数据
  2311. logger.warn('[Xiaohongshu] Navigation timeout, but API data may have been captured');
  2312. }
  2313. // 等待 API 响应
  2314. await page.waitForTimeout(5000);
  2315. logger.info(`[Xiaohongshu] After initial wait: apiResponseReceived=${apiResponseReceived}, notesCount=${allNotesData.length}`);
  2316. // 如果监听器没有捕获到数据,尝试直接调用 API
  2317. if (allNotesData.length === 0) {
  2318. logger.info('[Xiaohongshu] No notes captured via listener, trying direct API call...');
  2319. try {
  2320. const apiResponse = await fetchNotesPage(0);
  2321. logger.info(`[Xiaohongshu] Direct API call result: success=${apiResponse?.success}, code=${apiResponse?.code}`);
  2322. if ((apiResponse?.success || apiResponse?.code === 0) && apiResponse?.data) {
  2323. upsertNotesFromPayload(apiResponse.data);
  2324. }
  2325. } catch (apiError) {
  2326. logger.warn('[Xiaohongshu] Direct API call failed:', apiError);
  2327. }
  2328. }
  2329. if (allNotesData.length > 0) {
  2330. let pageNum = 1;
  2331. while (pageNum < maxPages) {
  2332. if (totalNotesCount > 0 && seenNoteIds.size >= totalNotesCount) break;
  2333. let nextResponse: any;
  2334. try {
  2335. nextResponse = await fetchNotesPage(pageNum);
  2336. } catch (e) {
  2337. logger.warn(`[Xiaohongshu] Page fetch failed: page=${pageNum}`, e);
  2338. break;
  2339. }
  2340. if (!(nextResponse?.success || nextResponse?.code === 0) || !nextResponse?.data) break;
  2341. const before = seenNoteIds.size;
  2342. upsertNotesFromPayload(nextResponse.data);
  2343. const after = seenNoteIds.size;
  2344. if (after === before) break;
  2345. pageNum++;
  2346. await page.waitForTimeout(600);
  2347. }
  2348. if (pageNum >= maxPages) stoppedByMaxPages = true;
  2349. }
  2350. // 移除监听器
  2351. page.off('response', notesApiHandler);
  2352. logger.info(`[Xiaohongshu] Total notes captured: ${allNotesData.length}`);
  2353. // 转换为 WorkItem 格式
  2354. for (const note of allNotesData) {
  2355. // 转换时长为 mm:ss 格式
  2356. const minutes = Math.floor(note.duration / 60);
  2357. const seconds = note.duration % 60;
  2358. const durationStr = note.duration > 0
  2359. ? `${minutes.toString().padStart(2, '0')}:${seconds.toString().padStart(2, '0')}`
  2360. : '';
  2361. // 转换状态
  2362. let statusStr = 'published';
  2363. if (note.status === 0) statusStr = 'draft';
  2364. else if (note.status === 2) statusStr = 'reviewing';
  2365. else if (note.status === 3) statusStr = 'rejected';
  2366. worksList.push({
  2367. videoId: note.noteId,
  2368. title: note.title || '无标题',
  2369. coverUrl: note.coverUrl,
  2370. videoUrl: note.noteId ? `https://www.xiaohongshu.com/explore/${note.noteId}` : '',
  2371. duration: durationStr,
  2372. publishTime: note.publishTime,
  2373. status: statusStr,
  2374. playCount: note.viewCount,
  2375. likeCount: note.likeCount,
  2376. commentCount: note.commentCount,
  2377. shareCount: note.shareCount,
  2378. });
  2379. }
  2380. logger.info(`[Xiaohongshu] Fetched ${worksList.length} works via API`);
  2381. if (totalNotesCount > 0) {
  2382. worksListComplete = worksList.length >= totalNotesCount;
  2383. worksCount = totalNotesCount;
  2384. } else if (worksList.length > 0) {
  2385. worksListComplete = !stoppedByMaxPages;
  2386. worksCount = worksList.length;
  2387. }
  2388. } catch (worksError) {
  2389. logger.warn('[Xiaohongshu] Failed to fetch works list:', worksError);
  2390. }
  2391. logger.info(`[Xiaohongshu] Final account info: id=${accountId}, name=${accountName}, fans=${fansCount}, works=${worksCount}`);
  2392. return { accountId, accountName, avatarUrl, fansCount, worksCount, worksList, worksListComplete };
  2393. } catch (error) {
  2394. logger.warn('[Xiaohongshu] Failed to fetch account info:', error);
  2395. }
  2396. return { accountId, accountName, avatarUrl, fansCount, worksCount };
  2397. }
  2398. /**
  2399. * 解析中文数字(如 1.2万 -> 12000)
  2400. */
  2401. private parseChineseNumber(str: string): number {
  2402. if (!str) return 0;
  2403. let num = parseFloat(str);
  2404. if (str.includes('万')) {
  2405. num *= 10000;
  2406. } else if (str.includes('亿')) {
  2407. num *= 100000000;
  2408. }
  2409. return Math.floor(num);
  2410. }
  2411. private async fetchXiaohongshuFansCountFromDataPage(page: Page): Promise<number | undefined> {
  2412. const fansDataUrl = 'https://creator.xiaohongshu.com/statistics/fans-data';
  2413. const overallNewPattern = /\/api\/galaxy\/creator\/data\/fans\/overall_new/i;
  2414. try {
  2415. const responsePromise = page.waitForResponse(
  2416. (res) => overallNewPattern.test(res.url()) && res.request().method() === 'GET',
  2417. { timeout: 20000 }
  2418. ).catch(() => null);
  2419. logger.info('[Xiaohongshu] Navigating to fans data page to fetch fans count...');
  2420. await page.goto(fansDataUrl, {
  2421. waitUntil: 'domcontentloaded',
  2422. timeout: 30000,
  2423. }).catch((error) => {
  2424. logger.warn('[Xiaohongshu] Fans data page navigation failed:', error);
  2425. });
  2426. await page.waitForTimeout(2000);
  2427. const currentUrl = page.url();
  2428. if (currentUrl.includes('login') || currentUrl.includes('passport')) {
  2429. logger.warn('[Xiaohongshu] Fans data page redirected to login');
  2430. return undefined;
  2431. }
  2432. const near30ButtonSelector =
  2433. '#content-area > main > div:nth-child(3) > div > div.content > div.css-12s9z8c.fans-data-container > div.title-container > div.extra-box > div > label:nth-child(2)';
  2434. await page.locator(near30ButtonSelector)
  2435. .or(page.locator('.fans-data-container').getByText('近30天').first())
  2436. .click({ timeout: 5000 })
  2437. .catch(() => undefined);
  2438. let response = await responsePromise;
  2439. if (!response) {
  2440. response = await page.waitForResponse(
  2441. (res) => overallNewPattern.test(res.url()) && res.request().method() === 'GET',
  2442. { timeout: 10000 }
  2443. ).catch(() => null);
  2444. }
  2445. if (!response) {
  2446. logger.warn('[Xiaohongshu] No fans overall_new response captured');
  2447. return undefined;
  2448. }
  2449. const body = await response.json().catch(() => null);
  2450. const count = extractLatestXiaohongshuFansCount(body);
  2451. if (count === undefined) {
  2452. logger.warn('[Xiaohongshu] Fans overall_new response did not contain fans count');
  2453. }
  2454. return count;
  2455. } catch (error) {
  2456. logger.warn('[Xiaohongshu] Failed to fetch fans count from fans data page:', error);
  2457. return undefined;
  2458. }
  2459. }
  2460. /**
  2461. * 获取平台配置
  2462. */
  2463. private getPlatformConfig(platform: PlatformType) {
  2464. const configs: Record<string, { homeUrl: string; loginIndicators: string[] }> = {
  2465. douyin: {
  2466. homeUrl: 'https://creator.douyin.com/creator-micro/home',
  2467. loginIndicators: ['login', 'passport', 'sso'],
  2468. },
  2469. bilibili: {
  2470. homeUrl: 'https://member.bilibili.com/platform/home',
  2471. loginIndicators: ['passport.bilibili.com', 'login'],
  2472. },
  2473. kuaishou: {
  2474. homeUrl: 'https://cp.kuaishou.com/profile',
  2475. loginIndicators: ['passport.kuaishou.com', 'login'],
  2476. },
  2477. xiaohongshu: {
  2478. homeUrl: 'https://creator.xiaohongshu.com/',
  2479. loginIndicators: ['login', 'passport'],
  2480. },
  2481. weixin_video: {
  2482. homeUrl: 'https://channels.weixin.qq.com/platform',
  2483. loginIndicators: ['login.html', '/login', 'passport'],
  2484. },
  2485. baijiahao: {
  2486. homeUrl: 'https://baijiahao.baidu.com/builder/rc/home',
  2487. loginIndicators: ['login', 'passport'],
  2488. },
  2489. toutiao: {
  2490. homeUrl: 'https://mp.toutiao.com/profile_v4/index',
  2491. loginIndicators: ['login', 'passport', 'sso'],
  2492. },
  2493. weibo: {
  2494. homeUrl: 'https://weibo.com/u/',
  2495. loginIndicators: ['login', 'passport'],
  2496. },
  2497. dayu: {
  2498. homeUrl: 'https://mp.dayu.com/',
  2499. loginIndicators: ['login', 'passport'],
  2500. },
  2501. };
  2502. return configs[platform] || { homeUrl: '', loginIndicators: ['login'] };
  2503. }
  2504. /**
  2505. * 百家号 - 直接通过 API 获取账号信息和作品列表
  2506. */
  2507. private async fetchBaijiahaoAccountInfoDirectApi(cookies: CookieData[]): Promise<AccountInfo> {
  2508. logger.info(`[Baijiahao API] Fetching account info via direct API...`);
  2509. // 构建 Cookie 字符串,确保格式正确
  2510. const cookieString = cookies
  2511. .map(c => `${c.name.trim()}=${c.value.trim()}`)
  2512. .filter(c => c.includes('=')) // 过滤掉无效的 cookie
  2513. .join('; ');
  2514. logger.debug(`[Baijiahao API] Cookie string length: ${cookieString.length}, cookie count: ${cookies.length}`);
  2515. const headers: Record<string, string> = {
  2516. 'Accept': '*/*',
  2517. 'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8',
  2518. 'Accept-Encoding': 'gzip, deflate, br',
  2519. 'Connection': 'keep-alive',
  2520. 'Cookie': cookieString,
  2521. // 'Referer': 'https://baijiahao.baidu.com/builder/rc/home',
  2522. // 'Origin': 'https://baijiahao.baidu.com',
  2523. 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
  2524. // 'Sec-Fetch-Dest': 'empty',
  2525. // 'Sec-Fetch-Mode': 'cors',
  2526. // 'Sec-Fetch-Site': 'same-origin',
  2527. };
  2528. let accountInfo: AccountInfo = this.getDefaultAccountInfo('baijiahao');
  2529. // 标记哪些字段已成功获取
  2530. let fansCountFetched = false;
  2531. let worksCountFetched = false;
  2532. try {
  2533. // 1. 获取账号基本信息 (appinfo API)
  2534. logger.info(`[Baijiahao API] Step 1: Fetching appinfo...`);
  2535. const appInfoResponse = await fetch('https://baijiahao.baidu.com/builder/app/appinfo', {
  2536. method: 'GET',
  2537. headers,
  2538. });
  2539. if (!appInfoResponse.ok) {
  2540. logger.error(`[Baijiahao API] appinfo request failed: ${appInfoResponse.status}`);
  2541. throw new Error(`appinfo request failed: ${appInfoResponse.status}`);
  2542. }
  2543. const appInfoData = await appInfoResponse.json() as {
  2544. errno?: number;
  2545. errmsg?: string;
  2546. data?: {
  2547. user?: {
  2548. name?: string;
  2549. avatar?: string;
  2550. app_id?: string | number;
  2551. userid?: number;
  2552. status?: string;
  2553. };
  2554. };
  2555. };
  2556. logger.info(`[Baijiahao API] appinfo response: errno=${appInfoData.errno}, errmsg=${appInfoData.errmsg}`);
  2557. if (appInfoData.errno !== 0) {
  2558. // #6085: errno 非 0 不一定是 cookie 失效(如 errno=10001402 分散认证),
  2559. // 只有 errno=110 才明确表示未登录,其他 errno 返回默认信息避免同步中断
  2560. if (appInfoData.errno === 110) {
  2561. logger.error(`[Baijiahao API] Not logged in (errno=110)`);
  2562. throw new Error(`appinfo API error: errno=110, cookie expired`);
  2563. }
  2564. logger.warn(`[Baijiahao API] appinfo returned errno=${appInfoData.errno}, errmsg=${appInfoData.errmsg}, returning default info`);
  2565. return accountInfo;
  2566. }
  2567. if (!appInfoData.data?.user) {
  2568. logger.error(`[Baijiahao API] No user data in appinfo response`);
  2569. throw new Error('No user data in appinfo response');
  2570. }
  2571. const user = appInfoData.data.user;
  2572. accountInfo.accountId = user.app_id ? `bjh_${user.app_id}` : accountInfo.accountId;
  2573. accountInfo.accountName = user.name || accountInfo.accountName;
  2574. // 处理头像 URL(可能是相对路径)
  2575. if (user.avatar) {
  2576. accountInfo.avatarUrl = user.avatar.startsWith('http')
  2577. ? user.avatar
  2578. : `https:${user.avatar}`;
  2579. }
  2580. logger.info(`[Baijiahao API] Got account info: name=${accountInfo.accountName}, id=${accountInfo.accountId}, avatar=${accountInfo.avatarUrl}`);
  2581. // 2. 获取粉丝数 (growthInfo API)
  2582. logger.info(`[Baijiahao API] Step 2: Fetching growth info...`);
  2583. try {
  2584. const growthInfoResponse = await fetch('https://baijiahao.baidu.com/cms-ui/rights/growth/get_info', {
  2585. method: 'GET',
  2586. headers,
  2587. });
  2588. if (growthInfoResponse.ok) {
  2589. const growthData = await growthInfoResponse.json() as {
  2590. errno?: number;
  2591. errmsg?: string;
  2592. data?: {
  2593. total_fans?: number;
  2594. };
  2595. };
  2596. logger.info(`[Baijiahao API] growth info response: errno=${growthData.errno}`);
  2597. if (growthData.errno === 0 && growthData.data) {
  2598. const fansCount = growthData.data.total_fans;
  2599. if (fansCount !== undefined && fansCount !== null) {
  2600. accountInfo.fansCount = fansCount;
  2601. fansCountFetched = true;
  2602. logger.info(`[Baijiahao API] Got fans count: ${accountInfo.fansCount}`);
  2603. } else {
  2604. logger.warn(`[Baijiahao API] growth info API returned no fans count`);
  2605. }
  2606. } else {
  2607. logger.warn(`[Baijiahao API] growth info API error: errno=${growthData.errno}, errmsg=${growthData.errmsg}`);
  2608. }
  2609. } else {
  2610. logger.warn(`[Baijiahao API] growth info request failed: ${growthInfoResponse.status}`);
  2611. }
  2612. } catch (growthError) {
  2613. logger.warn(`[Baijiahao API] Failed to fetch growth info (non-critical):`, growthError);
  2614. // 粉丝数获取失败不影响整体流程
  2615. }
  2616. // 3. 获取作品列表 (分页获取所有作品)
  2617. logger.info(`[Baijiahao API] Step 3: Fetching works list...`);
  2618. setTimeout(() => {
  2619. console.log('1000ms');
  2620. }, 1000);
  2621. const worksList: WorkItem[] = [];
  2622. let currentPage = 1;
  2623. const pageSize = 20;
  2624. let hasMore = true;
  2625. let totalWorks = 0;
  2626. let worksListError = false;
  2627. while (hasMore && !worksListError) {
  2628. try {
  2629. const listUrl = `https://baijiahao.baidu.com/pcui/article/lists?currentPage=${currentPage}&pageSize=${pageSize}&search=&type=&collection=&startDate=&endDate=&clearBeforeFetch=false&dynamic=0`;
  2630. logger.info(`[Baijiahao API] Fetching works page ${currentPage}...`);
  2631. logger.debug(`[Baijiahao API] Request headers include Cookie: ${!!headers.Cookie}, Cookie length: ${headers.Cookie?.length || 0}`);
  2632. const listResponse = await fetch(listUrl, {
  2633. method: 'GET',
  2634. headers,
  2635. });
  2636. if (!listResponse.ok) {
  2637. const errorText = await listResponse.text();
  2638. logger.warn(`[Baijiahao API] Failed to fetch works list page ${currentPage}: ${listResponse.status}`);
  2639. logger.warn(`[Baijiahao API] Error response body: ${errorText}`);
  2640. break;
  2641. }
  2642. const responseText = await listResponse.text();
  2643. logger.info(`[Baijiahao API] ========== Works API Response (Page ${currentPage}) ==========`);
  2644. logger.info(`[Baijiahao API] Full response: ${responseText}`);
  2645. logger.info(`[Baijiahao API] ============================================================`);
  2646. const listData = JSON.parse(responseText) as {
  2647. errno?: number;
  2648. errmsg?: string;
  2649. data?: {
  2650. list?: Array<{
  2651. id?: string;
  2652. article_id?: string;
  2653. title?: string;
  2654. cover_images?: string | string[];
  2655. created_at?: string;
  2656. create_time?: string;
  2657. status?: string;
  2658. read_amount?: number;
  2659. read_count?: number;
  2660. like_amount?: number;
  2661. like_count?: number;
  2662. comment_amount?: number;
  2663. comment_count?: number;
  2664. share_amount?: number;
  2665. share_count?: number;
  2666. }>;
  2667. page?: {
  2668. currentPage?: number;
  2669. pageSize?: number;
  2670. totalCount?: number;
  2671. totalPage?: number;
  2672. };
  2673. total?: number; // 兼容旧格式
  2674. };
  2675. };
  2676. // 处理分散认证问题 (errno=10001402),重试一次
  2677. if (listData.errno === 10001402) {
  2678. logger.warn(`[Baijiahao API] Dispersed authentication issue (errno=10001402) on page ${currentPage}, retrying after 3 seconds...`);
  2679. logger.debug(`[Baijiahao API] Request URL: ${listUrl}`);
  2680. logger.debug(`[Baijiahao API] Cookie header present: ${!!headers.Cookie}, length: ${headers.Cookie?.length || 0}`);
  2681. await new Promise(resolve => setTimeout(resolve, 3000));
  2682. // 重试一次,确保 headers 包含 Cookie
  2683. const retryResponse = await fetch(listUrl, {
  2684. method: 'GET',
  2685. headers: {
  2686. ...headers,
  2687. 'Cookie': cookieString, // 确保 Cookie 被正确传递
  2688. },
  2689. });
  2690. if (retryResponse.ok) {
  2691. const retryResponseText = await retryResponse.text();
  2692. logger.info(`[Baijiahao API] ========== Works API Retry Response (Page ${currentPage}) ==========`);
  2693. logger.info(`[Baijiahao API] Full retry response: ${retryResponseText}`);
  2694. logger.info(`[Baijiahao API] ============================================================`);
  2695. const retryData = JSON.parse(retryResponseText) as typeof listData;
  2696. if (retryData.errno === 0) {
  2697. logger.info(`[Baijiahao API] Retry successful for page ${currentPage}`);
  2698. Object.assign(listData, retryData);
  2699. } else if (retryData.errno === 10001402) {
  2700. logger.error(`[Baijiahao API] Retry still failed with errno=10001402, cookie may be invalid or expired`);
  2701. logger.error(`[Baijiahao API] Retry response data: ${JSON.stringify(retryData, null, 2)}`);
  2702. // 如果重试仍然失败,可能是 Cookie 问题,记录详细信息
  2703. logger.error(`[Baijiahao API] Cookie info: ${cookieString.substring(0, 200)}...`);
  2704. // 标记错误,但不完全失败,继续返回已获取的账号信息
  2705. worksListError = true;
  2706. logger.warn(`[Baijiahao API] Works list fetch failed, but will return other account info (name, fans count)`);
  2707. break;
  2708. } else {
  2709. logger.warn(`[Baijiahao API] Retry still failed: errno=${retryData.errno}, errmsg=${retryData.errmsg}`);
  2710. break;
  2711. }
  2712. } else {
  2713. logger.warn(`[Baijiahao API] Retry request failed: ${retryResponse.status}`);
  2714. break;
  2715. }
  2716. }
  2717. if (listData.errno !== 0) {
  2718. logger.warn(`[Baijiahao API] API returned error on page ${currentPage}: errno=${listData.errno}, errmsg=${listData.errmsg}`);
  2719. logger.warn(`[Baijiahao API] Error response data: ${JSON.stringify(listData, null, 2)}`);
  2720. // 如果不是 10001402 错误,标记为错误但继续返回其他信息
  2721. if (listData.errno !== 10001402) {
  2722. worksListError = true;
  2723. logger.warn(`[Baijiahao API] Works list fetch failed with errno=${listData.errno}, but will return other account info`);
  2724. }
  2725. break;
  2726. }
  2727. const list = listData.data?.list || [];
  2728. // 优先使用 data.page.totalCount,如果没有则使用 data.total(兼容旧格式)
  2729. totalWorks = listData.data?.page?.totalCount || listData.data?.total || 0;
  2730. logger.info(`[Baijiahao API] Got ${list.length} works on page ${currentPage}, total: ${totalWorks}`);
  2731. for (const item of list) {
  2732. const coverUrl = extractPlatformWorkCoverUrl(item);
  2733. worksList.push({
  2734. videoId: item.id || item.article_id || `bjh_${Date.now()}_${Math.random().toString(36).substr(2, 9)}`,
  2735. title: item.title || '',
  2736. coverUrl: coverUrl,
  2737. videoUrl: (item as any).url || (item as any).article_url || '',
  2738. duration: '00:00',
  2739. publishTime: item.created_at || item.create_time || new Date().toISOString(),
  2740. status: item.status || 'published',
  2741. playCount: item.read_amount || item.read_count || 0,
  2742. likeCount: item.like_amount || item.like_count || 0,
  2743. commentCount: item.comment_amount || item.comment_count || 0,
  2744. shareCount: item.share_amount || item.share_count || 0,
  2745. });
  2746. }
  2747. // 检查是否还有更多
  2748. if (list.length < pageSize) {
  2749. hasMore = false;
  2750. logger.info(`[Baijiahao API] No more works, stopping at page ${currentPage}`);
  2751. } else {
  2752. currentPage++;
  2753. // 防止无限循环,最多获取 10 页(200 个作品)
  2754. if (currentPage > 10) {
  2755. logger.warn(`[Baijiahao API] Reached max pages (10), stopping. Total fetched: ${worksList.length}, API total: ${totalWorks}`);
  2756. hasMore = false;
  2757. }
  2758. }
  2759. } catch (pageError) {
  2760. logger.error(`[Baijiahao API] Error fetching page ${currentPage}:`, pageError);
  2761. break;
  2762. }
  2763. }
  2764. accountInfo.worksList = worksList;
  2765. // 使用 API 返回的 total 字段作为作品总数,而不是已获取的作品列表长度
  2766. // 因为可能只获取了部分作品(最多 10 页),但 total 是真实的总数
  2767. if (totalWorks > 0) {
  2768. accountInfo.worksCount = totalWorks;
  2769. worksCountFetched = true;
  2770. } else if (worksList.length > 0) {
  2771. // 如果 API 没有返回 total,但获取到了作品列表,使用列表长度
  2772. accountInfo.worksCount = worksList.length;
  2773. worksCountFetched = true;
  2774. }
  2775. if (worksListError) {
  2776. logger.warn(`[Baijiahao API] Works list fetch encountered errors, but returning partial account info`);
  2777. logger.info(`[Baijiahao API] Account info (partial): name=${accountInfo.accountName}, fans=${accountInfo.fansCount} (fetched: ${fansCountFetched}), works=${accountInfo.worksCount} (fetched: ${worksCountFetched})`);
  2778. } else {
  2779. logger.info(`[Baijiahao API] Successfully fetched account info: name=${accountInfo.accountName}, fans=${accountInfo.fansCount} (fetched: ${fansCountFetched}), works=${accountInfo.worksCount} (fetched: ${worksCountFetched}, API total: ${totalWorks}, fetched list: ${worksList.length})`);
  2780. }
  2781. return accountInfo;
  2782. } catch (error) {
  2783. logger.error(`[Baijiahao API] Failed to fetch account info:`, error);
  2784. // 返回默认信息,但保留已获取的部分数据
  2785. return accountInfo;
  2786. }
  2787. }
  2788. /**
  2789. * 通过 Node ????? 获取账号信息
  2790. */
  2791. private getDefaultAccountInfo(platform: PlatformType): AccountInfo {
  2792. // 平台友好名称映射
  2793. const platformNames: Record<string, string> = {
  2794. douyin: '抖音',
  2795. xiaohongshu: '小红书',
  2796. kuaishou: '快手',
  2797. weixin_video: '视频号',
  2798. bilibili: 'B站',
  2799. toutiao: '头条',
  2800. baijiahao: '百家号',
  2801. };
  2802. const name = platformNames[platform] || platform;
  2803. return {
  2804. accountId: `${platform}_${Date.now()}`,
  2805. accountName: `${name}账号`,
  2806. avatarUrl: '',
  2807. fansCount: undefined,
  2808. worksCount: 0,
  2809. };
  2810. }
  2811. /**
  2812. * 获取抖音评论 - 逐个选择作品获取评论
  2813. */
  2814. async fetchDouyinComments(cookies: CookieData[]): Promise<WorkComments[]> {
  2815. const browser = await launchBrowser({
  2816. headless: true,
  2817. args: ['--no-sandbox', '--disable-setuid-sandbox'],
  2818. });
  2819. const allWorkComments: WorkComments[] = [];
  2820. try {
  2821. const context = await browser.newContext({
  2822. viewport: { width: 1920, height: 1080 },
  2823. userAgent: 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
  2824. });
  2825. // 设置 Cookie
  2826. const playwrightCookies = this.normalizePlaywrightCookies(cookies.map(c => ({
  2827. name: c.name,
  2828. value: c.value,
  2829. domain: c.domain || '.douyin.com',
  2830. path: c.path || '/',
  2831. })));
  2832. await context.addCookies(playwrightCookies);
  2833. const page = await context.newPage();
  2834. // 导航到评论管理页面
  2835. logger.info('Navigating to Douyin comment management page...');
  2836. await page.goto('https://creator.douyin.com/creator-micro/interactive/comment', {
  2837. waitUntil: 'domcontentloaded',
  2838. timeout: 30000,
  2839. });
  2840. await page.waitForTimeout(3000);
  2841. // 点击"选择作品"按钮
  2842. logger.info('Looking for "选择作品" button...');
  2843. const selectWorkBtn = await page.$('button:has-text("选择作品"), [class*="select"]:has-text("选择作品"), div:has-text("选择作品")');
  2844. if (selectWorkBtn) {
  2845. await selectWorkBtn.click();
  2846. await page.waitForTimeout(2000);
  2847. logger.info('Clicked "选择作品" button');
  2848. } else {
  2849. // 如果没有选择作品按钮,可能已经有作品被选中,直接获取当前评论
  2850. logger.info('No "选择作品" button found, fetching current comments...');
  2851. const currentComments = await this.extractCommentsFromPage(page);
  2852. if (currentComments.length > 0) {
  2853. allWorkComments.push({
  2854. videoId: 'current',
  2855. videoTitle: '当前作品',
  2856. videoCoverUrl: '',
  2857. comments: currentComments,
  2858. });
  2859. }
  2860. await page.close();
  2861. await context.close();
  2862. await browser.close();
  2863. return allWorkComments;
  2864. }
  2865. // 获取作品列表
  2866. const worksList = await page.evaluate(() => {
  2867. const works: Array<{ videoId: string; title: string; coverUrl: string }> = [];
  2868. // 查找作品列表容器(弹窗中的作品选择列表)
  2869. const workItems = document.querySelectorAll('[class*="video-card"], [class*="work-item"], [class*="content-item"]');
  2870. workItems.forEach((item, index) => {
  2871. const titleEl = item.querySelector('[class*="title"], [class*="desc"]');
  2872. const coverEl = item.querySelector('img, [class*="cover"]');
  2873. const title = titleEl?.textContent?.trim() || `作品 ${index + 1}`;
  2874. let coverUrl = '';
  2875. if (coverEl) {
  2876. coverUrl = (coverEl as HTMLImageElement).src ||
  2877. coverEl.getAttribute('style')?.match(/url\(['"]?([^'")\s]+)['"]?\)/)?.[1] || '';
  2878. }
  2879. // 获取 video ID(从数据属性或其他方式)
  2880. const videoId = item.getAttribute('data-video-id') ||
  2881. item.getAttribute('data-id') ||
  2882. `video_${index}`;
  2883. works.push({ videoId, title, coverUrl });
  2884. });
  2885. return works;
  2886. });
  2887. logger.info(`Found ${worksList.length} works in the selector`);
  2888. // 如果有作品列表,逐个选择并获取评论
  2889. if (worksList.length > 0) {
  2890. for (let i = 0; i < worksList.length; i++) {
  2891. try {
  2892. logger.info(`Processing work ${i + 1}/${worksList.length}: ${worksList[i].title}`);
  2893. // 点击选择该作品
  2894. const workItem = await page.$(`[class*="video-card"]:nth-child(${i + 1}), [class*="work-item"]:nth-child(${i + 1})`);
  2895. if (workItem) {
  2896. await workItem.click();
  2897. await page.waitForTimeout(2000);
  2898. // 点击确认按钮(如果有)
  2899. const confirmBtn = await page.$('button:has-text("确定"), button:has-text("确认")');
  2900. if (confirmBtn) {
  2901. await confirmBtn.click();
  2902. await page.waitForTimeout(2000);
  2903. }
  2904. }
  2905. // 等待评论加载
  2906. await page.waitForTimeout(2000);
  2907. // 提取评论
  2908. const comments = await this.extractCommentsFromPage(page);
  2909. if (comments.length > 0) {
  2910. allWorkComments.push({
  2911. videoId: worksList[i].videoId,
  2912. videoTitle: worksList[i].title,
  2913. videoCoverUrl: worksList[i].coverUrl,
  2914. comments,
  2915. });
  2916. logger.info(`Extracted ${comments.length} comments for work: ${worksList[i].title}`);
  2917. }
  2918. // 重新打开选择作品弹窗(如果需要继续选择其他作品)
  2919. if (i < worksList.length - 1) {
  2920. const selectBtn = await page.$('button:has-text("选择作品"), [class*="select"]:has-text("选择作品")');
  2921. if (selectBtn) {
  2922. await selectBtn.click();
  2923. await page.waitForTimeout(2000);
  2924. }
  2925. }
  2926. } catch (workError) {
  2927. logger.warn(`Failed to process work ${i + 1}:`, workError);
  2928. }
  2929. }
  2930. } else {
  2931. // 没有找到作品列表,尝试直接从页面获取评论
  2932. const comments = await this.extractCommentsFromPage(page);
  2933. if (comments.length > 0) {
  2934. // 获取当前显示的作品信息
  2935. const currentWork = await page.evaluate(() => {
  2936. const titleEl = document.querySelector('[class*="video-title"], [class*="content-title"]');
  2937. const coverEl = document.querySelector('[class*="video-cover"] img, [class*="cover"] img');
  2938. return {
  2939. title: titleEl?.textContent?.trim() || '当前作品',
  2940. coverUrl: (coverEl as HTMLImageElement)?.src || '',
  2941. };
  2942. });
  2943. allWorkComments.push({
  2944. videoId: 'current',
  2945. videoTitle: currentWork.title,
  2946. videoCoverUrl: currentWork.coverUrl,
  2947. comments,
  2948. });
  2949. }
  2950. }
  2951. await page.close();
  2952. await context.close();
  2953. await browser.close();
  2954. logger.info(`Total: fetched comments from ${allWorkComments.length} works`);
  2955. return allWorkComments;
  2956. } catch (error) {
  2957. logger.error('Error fetching Douyin comments:', error);
  2958. await browser.close();
  2959. return allWorkComments;
  2960. }
  2961. }
  2962. /**
  2963. * 从页面提取评论列表
  2964. * 使用抖音创作者中心的精确选择器
  2965. * 根据实际 HTML 结构:
  2966. * - 评论容器: container-sXKyMs (或类似 container-xxx)
  2967. * - 用户名: username-aLgaNB (或类似 username-xxx)
  2968. * - 时间: time-NRtTXO (或类似 time-xxx)
  2969. * - 评论内容: comment-content-text-JvmAKq (或类似 comment-content-text-xxx)
  2970. * - 头像: avatar-BRKDsF (或类似 avatar-xxx)
  2971. */
  2972. private async extractCommentsFromPage(page: Page): Promise<CommentItem[]> {
  2973. return page.evaluate(() => {
  2974. const comments: Array<{
  2975. commentId: string;
  2976. authorId: string;
  2977. authorName: string;
  2978. authorAvatar: string;
  2979. content: string;
  2980. likeCount: number;
  2981. commentTime: string;
  2982. }> = [];
  2983. const seenContents = new Set<string>();
  2984. // 方法1: 直接查找所有评论容器 (container-xxx 类名)
  2985. // 评论容器通常包含 checkbox、avatar、content 等子元素
  2986. const allContainers = document.querySelectorAll('[class*="container-"]');
  2987. const commentContainers: Element[] = [];
  2988. allContainers.forEach(container => {
  2989. // 检查是否是评论容器:包含用户名和评论内容
  2990. const hasUsername = container.querySelector('[class*="username-"]');
  2991. const hasCommentContent = container.querySelector('[class*="comment-content-text-"]');
  2992. if (hasUsername && hasCommentContent) {
  2993. commentContainers.push(container);
  2994. }
  2995. });
  2996. console.log(`Found ${commentContainers.length} comment containers`);
  2997. // 如果方法1没找到,尝试方法2:通过评论内容元素向上查找
  2998. if (commentContainers.length === 0) {
  2999. const contentElements = document.querySelectorAll('[class*="comment-content-text-"]');
  3000. console.log(`Found ${contentElements.length} content elements, searching parents...`);
  3001. contentElements.forEach(contentEl => {
  3002. let parent = contentEl.parentElement;
  3003. // 向上查找最多 10 层
  3004. for (let i = 0; i < 10 && parent; i++) {
  3005. const className = parent.className || '';
  3006. // 查找包含 container- 的父元素
  3007. if (className.includes('container-')) {
  3008. if (!commentContainers.includes(parent)) {
  3009. commentContainers.push(parent);
  3010. }
  3011. break;
  3012. }
  3013. parent = parent.parentElement;
  3014. }
  3015. });
  3016. }
  3017. console.log(`Total comment containers: ${commentContainers.length}`);
  3018. commentContainers.forEach((container, index) => {
  3019. try {
  3020. // 提取用户名 - 使用 username-xxx 选择器
  3021. let authorName = '';
  3022. const usernameEl = container.querySelector('[class*="username-"]');
  3023. if (usernameEl && usernameEl.textContent) {
  3024. authorName = usernameEl.textContent.trim();
  3025. }
  3026. if (!authorName) authorName = '未知用户';
  3027. // 提取头像 - 从 avatar-xxx 容器内的 img 提取
  3028. let authorAvatar = '';
  3029. const avatarContainer = container.querySelector('[class*="avatar-"]');
  3030. if (avatarContainer) {
  3031. const avatarImg = avatarContainer.querySelector('img');
  3032. if (avatarImg) {
  3033. authorAvatar = avatarImg.src || '';
  3034. }
  3035. }
  3036. // 提取时间 - 使用 time-xxx 选择器
  3037. let commentTime = '';
  3038. const timeEl = container.querySelector('[class*="time-"]');
  3039. if (timeEl && timeEl.textContent) {
  3040. commentTime = timeEl.textContent.trim();
  3041. }
  3042. // 提取评论内容 - 使用 comment-content-text-xxx 选择器
  3043. let content = '';
  3044. const contentEl = container.querySelector('[class*="comment-content-text-"]');
  3045. if (contentEl && contentEl.textContent) {
  3046. content = contentEl.textContent.trim();
  3047. }
  3048. // 跳过空内容
  3049. if (!content || content.length < 1) {
  3050. console.log(`[${index}] Skipping empty content`);
  3051. return;
  3052. }
  3053. // 去重 (基于用户名+内容)
  3054. const contentKey = `${authorName}||${content}`;
  3055. if (seenContents.has(contentKey)) {
  3056. console.log(`[${index}] Skipping duplicate: ${authorName} - ${content.slice(0, 20)}`);
  3057. return;
  3058. }
  3059. seenContents.add(contentKey);
  3060. // 提取点赞数 - 从 operations-xxx 或 item-xxx 中提取
  3061. let likeCount = 0;
  3062. const operationsEl = container.querySelector('[class*="operations-"]');
  3063. if (operationsEl) {
  3064. // 查找第一个 item-xxx,通常是点赞数
  3065. const firstItem = operationsEl.querySelector('[class*="item-"]');
  3066. if (firstItem) {
  3067. const text = firstItem.textContent || '';
  3068. const match = text.match(/(\d+)/);
  3069. if (match) {
  3070. likeCount = parseInt(match[1], 10);
  3071. }
  3072. }
  3073. }
  3074. // 生成唯一 ID
  3075. const contentHash = content.slice(0, 30) + authorName + commentTime;
  3076. const commentId = `dy_${btoa(encodeURIComponent(contentHash)).slice(0, 20)}`;
  3077. comments.push({
  3078. commentId,
  3079. authorId: authorName,
  3080. authorName,
  3081. authorAvatar,
  3082. content,
  3083. likeCount,
  3084. commentTime,
  3085. });
  3086. console.log(`[${index}] Extracted: ${authorName} - ${content.slice(0, 30)} (${commentTime})`);
  3087. } catch (err) {
  3088. console.error(`[${index}] Error extracting comment:`, err);
  3089. }
  3090. });
  3091. console.log(`Successfully extracted ${comments.length} comments`);
  3092. return comments;
  3093. });
  3094. }
  3095. /**
  3096. * 获取抖音评论 - 通过监听 API 请求 (推荐方式)
  3097. * 使用无头浏览器,通过拦截网络请求直接获取 API 数据
  3098. * 更稳定、更高效
  3099. */
  3100. async fetchDouyinCommentsByApiInterception(cookies: CookieData[]): Promise<WorkComments[]> {
  3101. const browser = await launchBrowser({
  3102. headless: true, // 无头模式
  3103. args: ['--no-sandbox', '--disable-setuid-sandbox'],
  3104. });
  3105. const allWorkComments: WorkComments[] = [];
  3106. // 存储捕获的 API 数据
  3107. const capturedWorks: Array<{
  3108. awemeId: string;
  3109. title: string;
  3110. coverUrl: string;
  3111. playCount: number;
  3112. likeCount: number;
  3113. commentCount: number;
  3114. shareCount: number;
  3115. collectCount: number;
  3116. videoUrl?: string;
  3117. createTime?: number;
  3118. }> = [];
  3119. const capturedComments: Map<string, CommentItem[]> = new Map();
  3120. try {
  3121. const context = await browser.newContext({
  3122. viewport: { width: 1920, height: 1080 },
  3123. userAgent: 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
  3124. });
  3125. // 设置 Cookie
  3126. const playwrightCookies = this.normalizePlaywrightCookies(cookies.map(c => ({
  3127. name: c.name,
  3128. value: c.value,
  3129. domain: c.domain || '.douyin.com',
  3130. path: c.path || '/',
  3131. })));
  3132. await context.addCookies(playwrightCookies);
  3133. logger.info(`[API Interception] Set ${playwrightCookies.length} cookies`);
  3134. const page = await context.newPage();
  3135. // 监听网络响应
  3136. const douyinApiResponseHandler = async (response: any) => {
  3137. const url = response.url();
  3138. try {
  3139. // 监听作品列表 API - 支持新旧两种接口
  3140. // 新接口: /janus/douyin/creator/pc/work_list (aweme_list 字段)
  3141. // 旧接口: /creator/item/list (item_info_list 字段)
  3142. if (url.includes('/work_list') || url.includes('/creator/item/list')) {
  3143. const data = await response.json();
  3144. // 新接口: aweme_list
  3145. if (data?.aweme_list && data.aweme_list.length > 0) {
  3146. for (const aweme of data.aweme_list) {
  3147. const awemeId = String(aweme.aweme_id || '');
  3148. if (!awemeId) continue;
  3149. const statistics = aweme.statistics || {};
  3150. const commentCount = parseInt(String(statistics.comment_count || '0'), 10);
  3151. let title = aweme.item_title || '';
  3152. if (!title) {
  3153. const desc = aweme.desc || aweme.caption || '';
  3154. title = desc.split('\n')[0].slice(0, 50) || '无标题';
  3155. }
  3156. let coverUrl = '';
  3157. if (aweme.Cover?.url_list?.length > 0) {
  3158. coverUrl = aweme.Cover.url_list[0];
  3159. } else if (aweme.video?.cover?.url_list?.length > 0) {
  3160. coverUrl = aweme.video.cover.url_list[0];
  3161. }
  3162. capturedWorks.push({
  3163. awemeId,
  3164. title,
  3165. coverUrl,
  3166. playCount: parseInt(String(statistics.play_count || '0'), 10),
  3167. likeCount: parseInt(String(statistics.digg_count || '0'), 10),
  3168. commentCount,
  3169. shareCount: parseInt(String(statistics.share_count || '0'), 10),
  3170. collectCount: parseInt(String(statistics.collect_count || '0'), 10),
  3171. videoUrl: '',
  3172. createTime: Number(aweme.create_time || 0) || undefined,
  3173. });
  3174. }
  3175. logger.info(`[API] Captured ${data.aweme_list.length} works from work_list API`);
  3176. }
  3177. // 旧接口: item_info_list
  3178. const itemList = data?.item_info_list || data?.item_list || [];
  3179. if (itemList.length > 0) {
  3180. for (const item of itemList) {
  3181. capturedWorks.push({
  3182. awemeId: item.item_id_plain || item.aweme_id || '',
  3183. title: item.title || '无标题',
  3184. coverUrl: item.cover_image_url || '',
  3185. playCount: item.play_count || 0,
  3186. likeCount: item.like_count || 0,
  3187. commentCount: item.comment_count || 0,
  3188. shareCount: item.share_count || 0,
  3189. collectCount: item.collect_count || 0,
  3190. videoUrl: '',
  3191. createTime: Number(item.create_time || 0) || undefined,
  3192. });
  3193. }
  3194. logger.info(`[API] Captured ${itemList.length} works from item/list API`);
  3195. }
  3196. }
  3197. // 监听评论列表 API(两种格式)
  3198. // 格式1: /comment/list/select/ - 初始加载,返回 { comments: [...] }
  3199. // 格式2: /creator/comment/list/ - 切换作品后,返回 { comment_info_list: [...] }
  3200. if (url.includes('/comment/list') || url.includes('/comment/read')) {
  3201. const data = await response.json();
  3202. // 从 URL 中提取 aweme_id
  3203. const awemeIdMatch = url.match(/aweme_id=(\d+)/);
  3204. const awemeId = awemeIdMatch?.[1] || '';
  3205. let comments: CommentItem[] = [];
  3206. // 格式1: 初始加载的评论 API (comment/list/select)
  3207. if (data?.comments && Array.isArray(data.comments) && data.comments.length > 0) {
  3208. comments = data.comments.map((c: Record<string, unknown>) => {
  3209. const user = c.user as Record<string, unknown> | undefined;
  3210. const avatarThumb = user?.avatar_thumb as Record<string, unknown> | undefined;
  3211. const avatarUrls = avatarThumb?.url_list as string[] | undefined;
  3212. return {
  3213. commentId: String(c.cid || ''),
  3214. authorId: String(user?.uid || ''),
  3215. authorName: String(user?.nickname || '匿名'),
  3216. authorAvatar: avatarUrls?.[0] || '',
  3217. content: String(c.text || ''),
  3218. likeCount: Number(c.digg_count || 0),
  3219. commentTime: new Date(Number(c.create_time || 0) * 1000).toISOString(),
  3220. videoId: String(c.aweme_id || awemeId),
  3221. };
  3222. });
  3223. logger.info(`[API] Format1 (comments): Captured ${comments.length} comments`);
  3224. }
  3225. // 格式2: 切换作品后的评论 API (creator/comment/list)
  3226. if (data?.comment_info_list && Array.isArray(data.comment_info_list) && data.comment_info_list.length > 0) {
  3227. comments = data.comment_info_list.map((c: Record<string, unknown>) => {
  3228. const userInfo = c.user_info as Record<string, unknown> | undefined;
  3229. return {
  3230. commentId: String(c.comment_id || ''),
  3231. authorId: String(userInfo?.user_id || ''),
  3232. authorName: String(userInfo?.screen_name || '匿名'),
  3233. authorAvatar: String(userInfo?.avatar_url || ''),
  3234. content: String(c.text || ''),
  3235. likeCount: Number(c.digg_count || 0),
  3236. commentTime: new Date(Number(c.create_time || 0) * 1000).toISOString(),
  3237. videoId: awemeId, // 这种格式没有直接返回 aweme_id
  3238. };
  3239. });
  3240. logger.info(`[API] Format2 (comment_info_list): Captured ${comments.length} comments`);
  3241. }
  3242. if (comments.length > 0) {
  3243. const videoId = comments[0]?.videoId || awemeId;
  3244. if (videoId) {
  3245. const existing = capturedComments.get(videoId) || [];
  3246. capturedComments.set(videoId, [...existing, ...comments]);
  3247. logger.info(`[API] Total captured ${comments.length} comments for aweme ${videoId}`);
  3248. }
  3249. }
  3250. }
  3251. } catch {
  3252. // 忽略非 JSON 响应
  3253. }
  3254. };
  3255. page.on('response', douyinApiResponseHandler);
  3256. // 导航到创作者中心页面(设置好 Cookie 后)
  3257. logger.info('[API Interception] Navigating to creator page...');
  3258. await page.goto('https://creator.douyin.com/creator-micro/home', {
  3259. waitUntil: 'domcontentloaded',
  3260. timeout: 60000,
  3261. });
  3262. // 等待页面加载
  3263. await page.waitForTimeout(3000);
  3264. // 检查是否需要登录
  3265. const currentUrl = page.url();
  3266. if (currentUrl.includes('login') || currentUrl.includes('passport')) {
  3267. logger.warn('[API Interception] Cookie expired');
  3268. return [];
  3269. }
  3270. // 方式1:直接调用 API 获取作品列表(优先)
  3271. logger.info('[API Interception] Fetching works via direct API...');
  3272. const apiResult = await this.fetchWorksDirectApi(page);
  3273. let works = apiResult.works;
  3274. // 方式2:如果直接调用失败,尝试通过页面触发 API
  3275. if (works.length === 0) {
  3276. logger.info('[API Interception] Direct API failed, trying page navigation...');
  3277. await page.goto('https://creator.douyin.com/creator-micro/interactive/comment', {
  3278. waitUntil: 'networkidle',
  3279. timeout: 60000,
  3280. });
  3281. await page.waitForTimeout(3000);
  3282. // 点击"选择作品"按钮触发作品列表 API
  3283. const selectBtn = await page.$('button:has-text("选择作品"), [class*="select"]:has-text("选择")');
  3284. if (selectBtn) {
  3285. await selectBtn.click();
  3286. await page.waitForTimeout(3000);
  3287. }
  3288. // 使用监听到的数据
  3289. works = capturedWorks;
  3290. }
  3291. logger.info(`[API Interception] Total works: ${works.length}`);
  3292. // 获取有评论的作品
  3293. const worksWithComments = works.filter(w => w.commentCount > 0);
  3294. logger.info(`[API Interception] Works with comments: ${worksWithComments.length}`);
  3295. // 如果有评论的作品,需要导航到评论管理页面并逐个切换获取
  3296. if (worksWithComments.length > 0) {
  3297. // 先尝试直接调用 API 获取评论
  3298. for (const work of worksWithComments) {
  3299. logger.info(`[API Interception] Trying direct API for: ${work.title.slice(0, 30)}... (${work.commentCount} comments)`);
  3300. let comments = capturedComments.get(work.awemeId) || [];
  3301. if (comments.length === 0) {
  3302. comments = await this.fetchCommentsDirectApi(page, work.awemeId);
  3303. }
  3304. if (comments.length > 0) {
  3305. allWorkComments.push({
  3306. videoId: work.awemeId,
  3307. videoTitle: work.title,
  3308. videoCoverUrl: work.coverUrl,
  3309. comments,
  3310. });
  3311. logger.info(`[API Interception] Got ${comments.length} comments for ${work.awemeId} via direct API`);
  3312. }
  3313. }
  3314. // 如果直接 API 没有获取到足够的评论,使用页面交互方式
  3315. const worksNeedingPageMethod = worksWithComments.filter(w => {
  3316. const found = allWorkComments.find(wc => wc.videoId === w.awemeId);
  3317. return !found || found.comments.length === 0;
  3318. });
  3319. if (worksNeedingPageMethod.length > 0) {
  3320. logger.info(`[API Interception] ${worksNeedingPageMethod.length} works need page interaction method`);
  3321. // works 是所有作品的列表(包括没有评论的),弹窗中的顺序应该和这个一致
  3322. // worksWithComments 是有评论的作品列表
  3323. logger.info(`[Page Method] All works: ${works.length}, works with comments: ${worksWithComments.length}`);
  3324. logger.info(`[Page Method] Works with comments IDs: ${worksWithComments.map(w => w.awemeId).join(', ')}`);
  3325. // 构建作品索引映射:在所有作品列表中,每个有评论的作品的索引是多少
  3326. const workIndexMap = new Map<string, number>();
  3327. works.forEach((w, idx) => {
  3328. if (w.commentCount > 0) {
  3329. workIndexMap.set(w.awemeId, idx);
  3330. }
  3331. });
  3332. logger.info(`[Page Method] Work index map: ${JSON.stringify(Object.fromEntries(workIndexMap))}`);
  3333. // 导航到评论管理页面
  3334. logger.info('[Page Method] Navigating to comment management page...');
  3335. await page.goto('https://creator.douyin.com/creator-micro/interactive/comment', {
  3336. waitUntil: 'domcontentloaded',
  3337. timeout: 60000,
  3338. });
  3339. // 等待页面加载
  3340. await page.waitForTimeout(5000);
  3341. // 用于存储最新捕获的评论和 aweme_id
  3342. const latestHolder: { comments: CommentItem[]; awemeId: string } = { comments: [], awemeId: '' };
  3343. // 设置监听器 - 捕获评论 API 响应
  3344. const douyinCommentHandler = async (response: any) => {
  3345. const url = response.url();
  3346. if (url.includes('/comment/list') || url.includes('/comment/read')) {
  3347. try {
  3348. const jsonData = await response.json();
  3349. let parsedComments: CommentItem[] = [];
  3350. let capturedAwemeId = '';
  3351. // 从 URL 中提取 aweme_id(格式1有)
  3352. const awemeIdMatch = url.match(/aweme_id=(\d+)/);
  3353. capturedAwemeId = awemeIdMatch?.[1] || '';
  3354. // 格式1: { comments: [...] }
  3355. if (jsonData?.comments && Array.isArray(jsonData.comments) && jsonData.comments.length > 0) {
  3356. // 从评论中提取 aweme_id
  3357. const firstComment = jsonData.comments[0] as Record<string, unknown>;
  3358. if (!capturedAwemeId && firstComment.aweme_id) {
  3359. capturedAwemeId = String(firstComment.aweme_id);
  3360. }
  3361. parsedComments = jsonData.comments.map((c: Record<string, unknown>) => ({
  3362. commentId: String((c as { cid?: string }).cid || ''),
  3363. authorId: String(((c as { user?: { uid?: string } }).user)?.uid || ''),
  3364. authorName: String(((c as { user?: { nickname?: string } }).user)?.nickname || '匿名'),
  3365. authorAvatar: ((c as { user?: { avatar_thumb?: { url_list?: string[] } } }).user)?.avatar_thumb?.url_list?.[0] || '',
  3366. content: String((c as { text?: string }).text || ''),
  3367. likeCount: Number((c as { digg_count?: number }).digg_count || 0),
  3368. commentTime: new Date(Number((c as { create_time?: number }).create_time || 0) * 1000).toISOString(),
  3369. videoId: capturedAwemeId,
  3370. }));
  3371. logger.info(`[Comment API] Format1: ${parsedComments.length} comments, aweme_id: ${capturedAwemeId}`);
  3372. }
  3373. // 格式2: { comment_info_list: [...] }
  3374. if (jsonData?.comment_info_list && Array.isArray(jsonData.comment_info_list) && jsonData.comment_info_list.length > 0) {
  3375. parsedComments = jsonData.comment_info_list.map((c: Record<string, unknown>) => {
  3376. const userInfo = c.user_info as Record<string, unknown> | undefined;
  3377. return {
  3378. commentId: String(c.comment_id || ''),
  3379. authorId: String(userInfo?.user_id || ''),
  3380. authorName: String(userInfo?.screen_name || '匿名'),
  3381. authorAvatar: String(userInfo?.avatar_url || ''),
  3382. content: String(c.text || ''),
  3383. likeCount: Number(c.digg_count || 0),
  3384. commentTime: new Date(Number(c.create_time || 0) * 1000).toISOString(),
  3385. videoId: '', // 格式2没有aweme_id,稍后填充
  3386. };
  3387. });
  3388. logger.info(`[Comment API] Format2: ${parsedComments.length} comments (no aweme_id)`);
  3389. }
  3390. if (parsedComments.length > 0) {
  3391. latestHolder.comments = parsedComments;
  3392. latestHolder.awemeId = capturedAwemeId;
  3393. }
  3394. } catch {
  3395. // 忽略
  3396. }
  3397. }
  3398. };
  3399. page.on('response', douyinCommentHandler);
  3400. // 等待第一个作品的评论加载(格式1,有 aweme_id)
  3401. await page.waitForTimeout(4000);
  3402. // 处理第一个作品(页面加载时自动显示,格式1有aweme_id可以直接匹配)
  3403. if (latestHolder.comments.length > 0 && latestHolder.awemeId) {
  3404. const matchedWork = works.find(w => w.awemeId === latestHolder.awemeId);
  3405. if (matchedWork) {
  3406. allWorkComments.push({
  3407. videoId: matchedWork.awemeId,
  3408. videoTitle: matchedWork.title,
  3409. videoCoverUrl: matchedWork.coverUrl,
  3410. comments: latestHolder.comments,
  3411. });
  3412. logger.info(`[Page Method] First work (by aweme_id): ${latestHolder.comments.length} comments for ${matchedWork.awemeId}`);
  3413. }
  3414. }
  3415. // 遍历其他有评论的作品
  3416. for (const work of worksWithComments) {
  3417. // 检查是否已获取
  3418. const existing = allWorkComments.find(wc => wc.videoId === work.awemeId);
  3419. if (existing) {
  3420. logger.info(`[Page Method] Skip ${work.awemeId}, already got ${existing.comments.length} comments`);
  3421. continue;
  3422. }
  3423. // 获取该作品在所有作品列表中的索引
  3424. const workIndex = workIndexMap.get(work.awemeId);
  3425. if (workIndex === undefined) {
  3426. logger.warn(`[Page Method] Work ${work.awemeId} not found in index map`);
  3427. continue;
  3428. }
  3429. logger.info(`[Page Method] Processing work: ${work.title.slice(0, 20)}... (awemeId: ${work.awemeId}, index: ${workIndex})`);
  3430. try {
  3431. // 清空之前的数据
  3432. latestHolder.comments = [];
  3433. latestHolder.awemeId = '';
  3434. // 点击"选择作品"按钮
  3435. await page.click('button:has-text("选择作品")');
  3436. await page.waitForTimeout(2000);
  3437. // 找到弹窗中的作品图片列表
  3438. const workImages = await page.$$('[role="dialog"] img[src*="douyinpic"], .douyin-creator-interactive-sidesheet-inner img[src*="douyinpic"]');
  3439. logger.info(`[Page Method] Found ${workImages.length} work images in dialog`);
  3440. if (workIndex < workImages.length) {
  3441. // 点击对应索引的作品
  3442. await workImages[workIndex].click();
  3443. logger.info(`[Page Method] Clicked work image at index ${workIndex}`);
  3444. // 等待评论 API 响应
  3445. await page.waitForTimeout(4000);
  3446. // 获取评论
  3447. if (latestHolder.comments.length > 0) {
  3448. // 使用当前 work 的 awemeId
  3449. const comments = latestHolder.comments.map(c => ({
  3450. ...c,
  3451. videoId: work.awemeId,
  3452. }));
  3453. allWorkComments.push({
  3454. videoId: work.awemeId,
  3455. videoTitle: work.title,
  3456. videoCoverUrl: work.coverUrl,
  3457. comments,
  3458. });
  3459. logger.info(`[Page Method] Got ${comments.length} comments for ${work.awemeId}`);
  3460. } else {
  3461. // 尝试从页面提取
  3462. const pageComments = await this.extractCommentsFromPage(page);
  3463. if (pageComments.length > 0) {
  3464. const comments = pageComments.map(c => ({ ...c, videoId: work.awemeId }));
  3465. allWorkComments.push({
  3466. videoId: work.awemeId,
  3467. videoTitle: work.title,
  3468. videoCoverUrl: work.coverUrl,
  3469. comments,
  3470. });
  3471. logger.info(`[Page Method] Extracted ${comments.length} comments from page`);
  3472. } else {
  3473. logger.warn(`[Page Method] No comments for ${work.awemeId}`);
  3474. }
  3475. }
  3476. } else {
  3477. logger.warn(`[Page Method] Index ${workIndex} out of range, only ${workImages.length} images`);
  3478. await page.keyboard.press('Escape');
  3479. }
  3480. await page.waitForTimeout(1000);
  3481. } catch (e) {
  3482. logger.warn(`[Page Method] Error for work ${work.awemeId}:`, e);
  3483. await page.keyboard.press('Escape').catch(() => { });
  3484. await page.waitForTimeout(500);
  3485. }
  3486. }
  3487. }
  3488. }
  3489. logger.info(`[API Interception] Total result: ${allWorkComments.length} works with comments`);
  3490. page.off('response', douyinApiResponseHandler);
  3491. await context.close();
  3492. } catch (error) {
  3493. logger.error('[API Interception] Error:', error);
  3494. } finally {
  3495. await browser.close();
  3496. }
  3497. return allWorkComments;
  3498. }
  3499. /**
  3500. * 直接调用评论 API 获取数据(支持分页获取所有评论)
  3501. * 优先使用视频页面 API(获取全部评论),失败时才使用创作者 API
  3502. */
  3503. private async fetchCommentsDirectApi(page: Page, awemeId: string): Promise<CommentItem[]> {
  3504. // 优先尝试视频页面 API(获取全部评论)
  3505. logger.info(`[DirectAPI] Fetching ALL comments for ${awemeId} via video page API...`);
  3506. let comments = await this.fetchVideoPageCommentsApi(page, awemeId);
  3507. if (comments.length > 0) {
  3508. logger.info(`[DirectAPI] Got ${comments.length} comments via video page API`);
  3509. return comments;
  3510. }
  3511. // 如果视频页面 API 失败,尝试创作者评论 API(可能只返回部分评论)
  3512. logger.info(`[DirectAPI] Video page API returned 0 comments, trying creator API...`);
  3513. comments = await this.fetchCreatorCommentsDirectApi(page, awemeId);
  3514. return comments;
  3515. }
  3516. /**
  3517. * 通过视频页面获取全部评论(主要方案)
  3518. * 这个 API 能获取视频的所有评论,不仅仅是创作者回复过的
  3519. */
  3520. private async fetchVideoPageCommentsApi(page: Page, awemeId: string): Promise<CommentItem[]> {
  3521. const comments: CommentItem[] = [];
  3522. const maxPages = 50;
  3523. let cursor = 0;
  3524. let hasMore = true;
  3525. let pageCount = 0;
  3526. try {
  3527. // 导航到视频页面
  3528. logger.info(`[VideoPageAPI] Navigating to video page for ${awemeId}...`);
  3529. await page.goto(`https://www.douyin.com/video/${awemeId}`, {
  3530. waitUntil: 'domcontentloaded',
  3531. timeout: 30000,
  3532. });
  3533. await page.waitForTimeout(2000);
  3534. while (hasMore && pageCount < maxPages) {
  3535. pageCount++;
  3536. const data = await page.evaluate(async ({ videoId, cursorValue }) => {
  3537. try {
  3538. const url = `https://www.douyin.com/aweme/v1/web/comment/list/?aweme_id=${videoId}&cursor=${cursorValue}&count=50&item_type=0&insert_ids=&whale_cut_token=&cut_version=1&rcFT=&aid=6383&device_platform=web_pc&verifyFp=&fp=&msToken=`;
  3539. const resp = await fetch(url, {
  3540. method: 'GET',
  3541. credentials: 'include',
  3542. headers: {
  3543. 'Accept': 'application/json, text/plain, */*',
  3544. 'Referer': `https://www.douyin.com/video/${videoId}`,
  3545. },
  3546. });
  3547. if (!resp.ok) {
  3548. return { error: `HTTP ${resp.status}`, comments: [] };
  3549. }
  3550. return resp.json();
  3551. } catch (e) {
  3552. return { error: String(e), comments: [] };
  3553. }
  3554. }, { videoId: awemeId, cursorValue: cursor });
  3555. if (data?.error) {
  3556. logger.warn(`[VideoPageAPI] API error for ${awemeId}: ${data.error}`);
  3557. }
  3558. if (data?.comments && Array.isArray(data.comments)) {
  3559. for (const c of data.comments) {
  3560. comments.push({
  3561. commentId: String(c.cid || ''),
  3562. authorId: String(c.user?.uid || c.user?.sec_uid || ''),
  3563. authorName: String(c.user?.nickname || '匿名'),
  3564. authorAvatar: c.user?.avatar_thumb?.url_list?.[0] || '',
  3565. content: String(c.text || ''),
  3566. likeCount: Number(c.digg_count || 0),
  3567. commentTime: new Date(Number(c.create_time || 0) * 1000).toISOString(),
  3568. videoId: String(c.aweme_id || awemeId),
  3569. });
  3570. }
  3571. logger.info(`[VideoPageAPI] Page ${pageCount}: got ${data.comments.length} comments for ${awemeId}`);
  3572. }
  3573. hasMore = data?.has_more === true || data?.has_more === 1;
  3574. cursor = data?.cursor || cursor + 50;
  3575. if (!data?.comments || data.comments.length === 0) {
  3576. hasMore = false;
  3577. }
  3578. }
  3579. logger.info(`[VideoPageAPI] Total fetched ${comments.length} comments for ${awemeId} from ${pageCount} pages`);
  3580. } catch (e) {
  3581. logger.warn(`[VideoPageAPI] Failed to fetch comments for ${awemeId}:`, e);
  3582. }
  3583. return comments;
  3584. }
  3585. /**
  3586. * 创作者评论管理 API(备用方案)
  3587. * 获取的是需要回复的评论
  3588. */
  3589. private async fetchCreatorCommentsDirectApi(page: Page, awemeId: string): Promise<CommentItem[]> {
  3590. const comments: CommentItem[] = [];
  3591. const maxPages = 50;
  3592. let cursor = 0;
  3593. let hasMore = true;
  3594. let pageCount = 0;
  3595. try {
  3596. while (hasMore && pageCount < maxPages) {
  3597. pageCount++;
  3598. const data = await page.evaluate(async ({ videoId, cursorValue }) => {
  3599. const url = `https://creator.douyin.com/web/api/third_party/aweme/api/comment/read/aweme/v1/web/comment/list/select/?aweme_id=${videoId}&cursor=${cursorValue}&count=50&comment_select_options=0&sort_options=0&channel_id=618&app_id=2906&aid=2906&device_platform=webapp`;
  3600. const resp = await fetch(url, {
  3601. credentials: 'include',
  3602. headers: {
  3603. 'Accept': 'application/json',
  3604. },
  3605. });
  3606. return resp.json();
  3607. }, { videoId: awemeId, cursorValue: cursor });
  3608. if (data?.comments && Array.isArray(data.comments)) {
  3609. for (const c of data.comments) {
  3610. comments.push({
  3611. commentId: String(c.cid || ''),
  3612. authorId: String(c.user?.uid || ''),
  3613. authorName: String(c.user?.nickname || '匿名'),
  3614. authorAvatar: c.user?.avatar_thumb?.url_list?.[0] || '',
  3615. content: String(c.text || ''),
  3616. likeCount: Number(c.digg_count || 0),
  3617. commentTime: new Date(Number(c.create_time || 0) * 1000).toISOString(),
  3618. videoId: String(c.aweme_id || awemeId),
  3619. });
  3620. }
  3621. logger.info(`[CreatorAPI] Page ${pageCount}: got ${data.comments.length} comments for ${awemeId}`);
  3622. }
  3623. hasMore = data?.has_more === true || data?.has_more === 1;
  3624. cursor = data?.cursor || cursor + 50;
  3625. if (!data?.comments || data.comments.length === 0) {
  3626. hasMore = false;
  3627. }
  3628. }
  3629. logger.info(`[CreatorAPI] Total fetched ${comments.length} comments for ${awemeId} from ${pageCount} pages`);
  3630. } catch (e) {
  3631. logger.warn(`[CreatorAPI] Failed to fetch comments for ${awemeId}:`, e);
  3632. }
  3633. return comments;
  3634. }
  3635. /**
  3636. * 直接调用抖音 API 获取作品列表
  3637. * 使用新的 work_list 接口,支持分页加载
  3638. * 返回作品列表和总作品数
  3639. *
  3640. * 注意:需要先导航到作品管理页面才能正确调用 API
  3641. */
  3642. private async fetchWorksDirectApi(page: Page): Promise<{
  3643. works: Array<{
  3644. awemeId: string;
  3645. title: string;
  3646. coverUrl: string;
  3647. videoUrl?: string;
  3648. playCount: number;
  3649. likeCount: number;
  3650. commentCount: number;
  3651. shareCount: number;
  3652. collectCount: number;
  3653. createTime?: number;
  3654. }>;
  3655. total: number;
  3656. }> {
  3657. const works: Array<{
  3658. awemeId: string;
  3659. title: string;
  3660. coverUrl: string;
  3661. videoUrl?: string;
  3662. playCount: number;
  3663. likeCount: number;
  3664. commentCount: number;
  3665. shareCount: number;
  3666. collectCount: number;
  3667. createTime?: number;
  3668. }> = [];
  3669. let totalCount = 0; // 从 API 获取的总作品数
  3670. try {
  3671. // 首先导航到作品管理页面,确保 API 有正确的上下文和权限
  3672. const contentManageUrl = 'https://creator.douyin.com/creator-micro/content/manage';
  3673. const currentUrl = page.url();
  3674. if (!currentUrl.includes('/content/manage')) {
  3675. logger.info(`[DirectAPI] Navigating to content manage page...`);
  3676. await page.goto(contentManageUrl, {
  3677. waitUntil: 'domcontentloaded',
  3678. timeout: 30000,
  3679. });
  3680. await page.waitForTimeout(2000);
  3681. // 检查是否需要登录
  3682. const newUrl = page.url();
  3683. if (newUrl.includes('login') || newUrl.includes('passport')) {
  3684. logger.warn('[DirectAPI] Not logged in, cannot fetch works');
  3685. return { works, total: 0 };
  3686. }
  3687. }
  3688. let hasMore = true;
  3689. let maxCursor = 0;
  3690. let pageCount = 0;
  3691. const maxPages = 20; // 最多加载20页,防止无限循环
  3692. while (hasMore && pageCount < maxPages) {
  3693. pageCount++;
  3694. logger.info(`[DirectAPI] Fetching works page ${pageCount}, cursor: ${maxCursor}`);
  3695. const data = await page.evaluate(async (cursor: number) => {
  3696. // 使用新的 work_list API 接口
  3697. // status: 0 表示获取全部已发布的作品
  3698. // count: 每页获取数量
  3699. const url = `https://creator.douyin.com/janus/douyin/creator/pc/work_list?status=0&scene=star_atlas&device_platform=android&count=20&max_cursor=${cursor}&cookie_enabled=true&browser_language=zh-CN&browser_platform=Win32&browser_name=Mozilla&browser_online=true&timezone_name=Asia%2FShanghai&aid=1128`;
  3700. const resp = await fetch(url, {
  3701. credentials: 'include',
  3702. headers: {
  3703. 'Accept': 'application/json',
  3704. },
  3705. });
  3706. return resp.json();
  3707. }, maxCursor);
  3708. // 获取作品数
  3709. const awemeList = data?.aweme_list || [];
  3710. // 记录完整的API响应数据,用于调试
  3711. if (pageCount === 1) {
  3712. logger.info(`[DirectAPI] First page API response: ${JSON.stringify(data).substring(0, 1000)}`);
  3713. }
  3714. logger.info(`[DirectAPI] API response: status_code=${data?.status_code}, has_more=${data?.has_more}, max_cursor=${data?.max_cursor}, aweme_list_length=${awemeList.length}`);
  3715. // 检查 API 返回状态
  3716. if (data?.status_code !== 0 && data?.status_code !== undefined) {
  3717. logger.warn(`[DirectAPI] API returned error status_code: ${data.status_code}`);
  3718. logger.warn(`[DirectAPI] Error message: ${data?.err_msg || data?.errMsg || 'unknown'}`);
  3719. // status_code: 8 表示未授权,可能需要重新登录
  3720. if (data.status_code === 8) {
  3721. logger.warn('[DirectAPI] status_code 8: Not authorized, may need re-login');
  3722. }
  3723. // 如果是第一页就出错,记录更详细的错误信息
  3724. if (pageCount === 1) {
  3725. logger.error(`[DirectAPI] First page failed with status_code ${data.status_code}, cannot fetch works`);
  3726. logger.error(`[DirectAPI] Response data: ${JSON.stringify(data).substring(0, 500)}`);
  3727. }
  3728. break;
  3729. }
  3730. // 如果 status_code 是 0 但 aweme_list 为空,记录警告
  3731. if (data?.status_code === 0 && awemeList.length === 0 && pageCount === 1) {
  3732. logger.warn(`[DirectAPI] API returned success but aweme_list is empty on first page`);
  3733. logger.warn(`[DirectAPI] Response data: ${JSON.stringify(data).substring(0, 500)}`);
  3734. }
  3735. // 优先从第一个作品的 author.aweme_count 获取真实作品数(只在第一页获取)
  3736. if (pageCount === 1) {
  3737. const firstAweme = awemeList[0];
  3738. // 方案1: 从 author.aweme_count 获取(最准确)
  3739. if (awemeList.length > 0 && firstAweme?.author?.aweme_count !== undefined && firstAweme.author.aweme_count > 0) {
  3740. totalCount = firstAweme.author.aweme_count;
  3741. logger.info(`[DirectAPI] Using author.aweme_count as total works: ${totalCount}`);
  3742. }
  3743. // 方案2: 如果 author.aweme_count 不存在,尝试从 API 响应的 total 字段获取
  3744. if (totalCount === 0 && data?.total !== undefined && data.total > 0) {
  3745. totalCount = data.total;
  3746. logger.info(`[DirectAPI] Using API response total field: ${totalCount}`);
  3747. }
  3748. // 方案3: 如果前两个都没有,尝试从 author 字段的其他属性获取
  3749. if (totalCount === 0 && awemeList.length > 0 && firstAweme?.author) {
  3750. const author = firstAweme.author;
  3751. const possibleCountFields = ['aweme_count', 'work_count', 'video_count', 'item_count'];
  3752. for (const field of possibleCountFields) {
  3753. if (author[field] !== undefined && author[field] > 0) {
  3754. totalCount = author[field];
  3755. logger.info(`[DirectAPI] Using author.${field} as total works: ${totalCount}`);
  3756. break;
  3757. }
  3758. }
  3759. }
  3760. // 方案4: 如果以上都没有,记录完整的 author 对象以便调试
  3761. if (totalCount === 0 && awemeList.length > 0 && firstAweme?.author) {
  3762. logger.warn(`[DirectAPI] Could not find total works count. Author object: ${JSON.stringify(firstAweme.author).substring(0, 300)}`);
  3763. }
  3764. // 方案5: 如果第一页没有数据,也记录完整响应以便调试
  3765. if (totalCount === 0 && awemeList.length === 0) {
  3766. logger.warn(`[DirectAPI] First page returned 0 works. Full response: ${JSON.stringify(data).substring(0, 500)}`);
  3767. }
  3768. }
  3769. // 解析 aweme_list 中的作品数据
  3770. logger.info(`[DirectAPI] Page ${pageCount}: got ${awemeList.length} works from aweme_list`);
  3771. for (const aweme of awemeList) {
  3772. const awemeId = String(aweme.aweme_id || '');
  3773. if (!awemeId) continue;
  3774. // 从 statistics 中获取所有统计字段
  3775. const statistics = aweme.statistics || {};
  3776. const playCount = parseInt(String(statistics.play_count || '0'), 10);
  3777. const likeCount = parseInt(String(statistics.digg_count || '0'), 10); // 抖音用 digg_count 表示点赞
  3778. const commentCount = parseInt(String(statistics.comment_count || '0'), 10);
  3779. const shareCount = parseInt(String(statistics.share_count || '0'), 10);
  3780. const collectCount = parseInt(String(statistics.collect_count || '0'), 10);
  3781. // 获取标题:优先使用 item_title,其次使用 desc(描述)
  3782. let title = aweme.item_title || '';
  3783. if (!title) {
  3784. // 从 desc 中提取标题(取第一行或前50个字符)
  3785. const desc = aweme.desc || aweme.caption || '';
  3786. title = desc.split('\n')[0].slice(0, 50) || '无标题';
  3787. }
  3788. // 获取封面 URL:从 Cover.url_list 或 video.cover.url_list 中获取
  3789. let coverUrl = '';
  3790. if (aweme.Cover?.url_list?.length > 0) {
  3791. coverUrl = aweme.Cover.url_list[0];
  3792. } else if (aweme.video?.cover?.url_list?.length > 0) {
  3793. coverUrl = aweme.video.cover.url_list[0];
  3794. }
  3795. // 入库 video_url 使用 play_addr.url_list 的第一项
  3796. const videoUrl = aweme.video?.play_addr?.url_list?.[0] || '';
  3797. works.push({
  3798. awemeId,
  3799. title,
  3800. coverUrl,
  3801. videoUrl,
  3802. playCount,
  3803. likeCount,
  3804. commentCount,
  3805. shareCount,
  3806. collectCount,
  3807. createTime: aweme.create_time,
  3808. });
  3809. }
  3810. // 检查是否有更多数据
  3811. // 注意: 抖音 API 返回的 has_more 可能不准确,我们额外判断
  3812. const apiHasMore = data?.has_more === true || data?.has_more === 1;
  3813. // 更新游标:使用返回的 max_cursor
  3814. if (data?.max_cursor !== undefined && data?.max_cursor !== null) {
  3815. // 只有当 max_cursor 有变化时才继续
  3816. if (data.max_cursor !== maxCursor) {
  3817. maxCursor = data.max_cursor;
  3818. hasMore = apiHasMore && awemeList.length > 0;
  3819. } else {
  3820. // max_cursor 没变化,说明到底了
  3821. hasMore = false;
  3822. }
  3823. } else if (awemeList.length > 0) {
  3824. // 如果没有 max_cursor,使用最后一个作品的 create_time 作为游标
  3825. const lastAweme = awemeList[awemeList.length - 1];
  3826. if (lastAweme.create_time) {
  3827. const newCursor = lastAweme.create_time;
  3828. if (newCursor !== maxCursor) {
  3829. maxCursor = newCursor;
  3830. hasMore = apiHasMore;
  3831. } else {
  3832. hasMore = false;
  3833. }
  3834. } else {
  3835. hasMore = false;
  3836. }
  3837. } else {
  3838. // 没有获取到数据,停止循环
  3839. hasMore = false;
  3840. }
  3841. logger.info(`[DirectAPI] Page ${pageCount} result: got ${awemeList.length} works, hasMore=${hasMore}, nextCursor=${maxCursor}`);
  3842. // 稍微延迟,避免请求过快
  3843. if (hasMore) {
  3844. await new Promise(resolve => setTimeout(resolve, 500));
  3845. }
  3846. }
  3847. logger.info(`[DirectAPI] Total fetched ${works.length} works from ${pageCount} pages, items count: ${totalCount}`);
  3848. // 如果总作品数 > 0 但实际获取到的作品数为 0,记录警告
  3849. if (totalCount > 0 && works.length === 0) {
  3850. logger.warn(`[DirectAPI] Warning: API reported ${totalCount} works but fetched 0 works`);
  3851. logger.warn(`[DirectAPI] This may indicate: API error, cookie expired, or permission issue`);
  3852. }
  3853. } catch (e) {
  3854. logger.error('[DirectAPI] Failed to fetch works:', e);
  3855. logger.error('[DirectAPI] Error details:', e instanceof Error ? e.stack : String(e));
  3856. }
  3857. return { works, total: totalCount };
  3858. }
  3859. /**
  3860. * 通过 Node ????? 获取评论 - 分作品逐个获取
  3861. */
  3862. /**
  3863. * ??????
  3864. */
  3865. async fetchDouyinCommentsViaApi(cookies: CookieData[]): Promise<WorkComments[]> {
  3866. const result = await this.fetchDouyinCommentsByApiInterception(cookies);
  3867. if (result.length > 0) {
  3868. return result;
  3869. }
  3870. logger.info('[Fallback] Using DOM parsing method...');
  3871. const browser = await launchBrowser({
  3872. headless: true, // 改为无头模式
  3873. args: ['--no-sandbox', '--disable-setuid-sandbox'],
  3874. });
  3875. const allWorkComments: WorkComments[] = [];
  3876. try {
  3877. const context = await browser.newContext({
  3878. viewport: { width: 1920, height: 1080 },
  3879. userAgent: 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
  3880. });
  3881. // 设置 Cookie
  3882. const playwrightCookies = this.normalizePlaywrightCookies(cookies.map(c => ({
  3883. name: c.name,
  3884. value: c.value,
  3885. domain: c.domain || '.douyin.com',
  3886. path: c.path || '/',
  3887. })));
  3888. await context.addCookies(playwrightCookies);
  3889. logger.info(`Set ${playwrightCookies.length} cookies`);
  3890. const page = await context.newPage();
  3891. // 导航到评论管理页面
  3892. logger.info('Navigating to Douyin comment management page...');
  3893. await page.goto('https://creator.douyin.com/creator-micro/interactive/comment', {
  3894. waitUntil: 'domcontentloaded',
  3895. timeout: 60000,
  3896. });
  3897. // 等待页面完全加载
  3898. logger.info('Waiting for page to fully load...');
  3899. await page.waitForTimeout(5000);
  3900. // 检查是否需要登录
  3901. const currentUrl = page.url();
  3902. logger.info(`Current URL: ${currentUrl}`);
  3903. if (currentUrl.includes('login') || currentUrl.includes('passport')) {
  3904. logger.warn('Cookie expired, need re-login');
  3905. await browser.close();
  3906. return allWorkComments;
  3907. }
  3908. logger.info('Page loaded successfully');
  3909. // 等待评论列表加载
  3910. logger.info('Waiting for comments to load...');
  3911. try {
  3912. await page.waitForSelector('[class*="comment-content-text-"]', { timeout: 10000 });
  3913. logger.info('Comments loaded');
  3914. } catch {
  3915. logger.warn('No comments found on initial load, will try to select works');
  3916. }
  3917. // 辅助函数:从当前页面提取评论
  3918. const extractCommentsFromCurrentPage = async (): Promise<CommentItem[]> => {
  3919. logger.info('Extracting comments from current page...');
  3920. // 滚动页面加载所有评论
  3921. await page.evaluate(async () => {
  3922. // 滚动多次加载更多评论
  3923. for (let i = 0; i < 10; i++) {
  3924. window.scrollBy(0, 500);
  3925. await new Promise(r => setTimeout(r, 800));
  3926. }
  3927. window.scrollTo(0, 0);
  3928. });
  3929. await page.waitForTimeout(2000);
  3930. // 使用精确选择器提取评论
  3931. const comments = await page.evaluate(() => {
  3932. const result: Array<{
  3933. commentId: string;
  3934. authorId: string;
  3935. authorName: string;
  3936. authorAvatar: string;
  3937. content: string;
  3938. likeCount: number;
  3939. commentTime: string;
  3940. }> = [];
  3941. const seenContents = new Set<string>();
  3942. // 查找所有评论容器:包含 username 和 comment-content-text 的 container
  3943. const allContainers = document.querySelectorAll('[class*="container-"]');
  3944. const commentContainers: Element[] = [];
  3945. allContainers.forEach(container => {
  3946. const hasUsername = container.querySelector('[class*="username-"]');
  3947. const hasCommentContent = container.querySelector('[class*="comment-content-text-"]');
  3948. if (hasUsername && hasCommentContent) {
  3949. commentContainers.push(container);
  3950. }
  3951. });
  3952. console.log(`Found ${commentContainers.length} comment containers`);
  3953. commentContainers.forEach((container, index) => {
  3954. try {
  3955. // 用户名
  3956. const usernameEl = container.querySelector('[class*="username-"]');
  3957. const authorName = usernameEl?.textContent?.trim() || '未知用户';
  3958. // 时间
  3959. const timeEl = container.querySelector('[class*="time-"]');
  3960. const commentTime = timeEl?.textContent?.trim() || '';
  3961. // 评论内容
  3962. const contentEl = container.querySelector('[class*="comment-content-text-"]');
  3963. const content = contentEl?.textContent?.trim() || '';
  3964. if (!content) return;
  3965. // 头像
  3966. const avatarContainer = container.querySelector('[class*="avatar-"]');
  3967. const avatarImg = avatarContainer?.querySelector('img');
  3968. const authorAvatar = avatarImg?.src || '';
  3969. // 去重
  3970. const key = `${authorName}||${content}`;
  3971. if (seenContents.has(key)) return;
  3972. seenContents.add(key);
  3973. // 点赞数
  3974. let likeCount = 0;
  3975. const opsEl = container.querySelector('[class*="operations-"]');
  3976. if (opsEl) {
  3977. const itemEl = opsEl.querySelector('[class*="item-"]');
  3978. if (itemEl) {
  3979. const match = itemEl.textContent?.match(/(\d+)/);
  3980. if (match) likeCount = parseInt(match[1], 10);
  3981. }
  3982. }
  3983. // 生成 ID
  3984. const hash = content.slice(0, 30) + authorName + commentTime;
  3985. const commentId = `dy_${btoa(encodeURIComponent(hash)).slice(0, 20)}`;
  3986. result.push({
  3987. commentId,
  3988. authorId: authorName,
  3989. authorName,
  3990. authorAvatar,
  3991. content,
  3992. likeCount,
  3993. commentTime,
  3994. });
  3995. console.log(`[${index}] ${authorName}: ${content.slice(0, 30)}`);
  3996. } catch (e) {
  3997. console.error(`Error at ${index}:`, e);
  3998. }
  3999. });
  4000. return result;
  4001. });
  4002. logger.info(`Extracted ${comments.length} comments`);
  4003. return comments;
  4004. };
  4005. // 辅助函数:获取当前显示的作品标题
  4006. const getCurrentWorkTitle = async (): Promise<string> => {
  4007. return page.evaluate(() => {
  4008. // 查找作品标题 - 通常在页面顶部区域
  4009. // 排除筛选器和按钮中的文本
  4010. const excludeTexts = ['全部评论', '最新发布', '全部人群', '搜索', '选择作品', '评论管理'];
  4011. // 方法1: 查找视频信息区域
  4012. const videoInfoSelectors = [
  4013. '[class*="video-info"] [class*="title"]',
  4014. '[class*="work-info"] [class*="title"]',
  4015. '[class*="content-info"] [class*="title"]',
  4016. ];
  4017. for (const selector of videoInfoSelectors) {
  4018. const el = document.querySelector(selector);
  4019. if (el?.textContent) {
  4020. const text = el.textContent.trim();
  4021. if (text.length > 5 && !excludeTexts.some(e => text.includes(e))) {
  4022. return text;
  4023. }
  4024. }
  4025. }
  4026. // 方法2: 查找页面上较长的标题文本
  4027. const allTexts = document.querySelectorAll('div, span, p');
  4028. for (const el of Array.from(allTexts)) {
  4029. const text = el.textContent?.trim() || '';
  4030. if (text.length > 20 &&
  4031. text.length < 200 &&
  4032. !excludeTexts.some(e => text.includes(e)) &&
  4033. !el.closest('button') &&
  4034. !el.closest('[class*="select"]') &&
  4035. !el.closest('[class*="filter"]')) {
  4036. // 检查是否可能是作品标题(通常包含特定字符或格式)
  4037. if (text.includes('#') || text.match(/[,。!?、]/)) {
  4038. return text;
  4039. }
  4040. }
  4041. }
  4042. return '';
  4043. });
  4044. };
  4045. // 步骤1: 先获取当前页面显示的评论(默认显示的第一个作品)
  4046. logger.info('Step 1: Getting comments from default view...');
  4047. const defaultTitle = await getCurrentWorkTitle();
  4048. const defaultComments = await extractCommentsFromCurrentPage();
  4049. if (defaultComments.length > 0) {
  4050. allWorkComments.push({
  4051. videoId: `video_${Date.now()}`,
  4052. videoTitle: defaultTitle || '默认作品',
  4053. videoCoverUrl: '',
  4054. comments: defaultComments,
  4055. });
  4056. logger.info(`Got ${defaultComments.length} comments from default view, title: "${defaultTitle.slice(0, 50)}"`);
  4057. }
  4058. // 步骤2: 尝试点击"选择作品"按钮获取更多作品的评论
  4059. logger.info('Step 2: Looking for "选择作品" button...');
  4060. // 使用 locator 查找按钮
  4061. const selectBtn = page.locator('text=选择作品').first();
  4062. const btnCount = await selectBtn.count();
  4063. logger.info(`Found ${btnCount} "选择作品" button(s)`);
  4064. if (btnCount > 0) {
  4065. logger.info('Clicking "选择作品" button...');
  4066. await selectBtn.click();
  4067. // 等待更长时间,确保弹窗完全加载
  4068. logger.info('Waiting for work list modal to appear...');
  4069. await page.waitForTimeout(5000);
  4070. // 打印当前页面状态,帮助调试
  4071. const modalInfo = await page.evaluate(() => {
  4072. // 查找所有可能的弹窗元素
  4073. const modals = document.querySelectorAll('[class*="modal"], [class*="popup"], [class*="drawer"], [class*="dialog"], [role="dialog"]');
  4074. const modalClasses = Array.from(modals).map(m => m.className).slice(0, 5);
  4075. // 查找所有图片(作品封面)
  4076. const images = document.querySelectorAll('img[src*="douyinpic"]');
  4077. // 查找所有可能的卡片元素
  4078. const cards = document.querySelectorAll('[class*="card"], [class*="item"]');
  4079. const cardClasses = Array.from(cards).map(c => c.className).slice(0, 10);
  4080. return {
  4081. modalCount: modals.length,
  4082. modalClasses,
  4083. imageCount: images.length,
  4084. cardCount: cards.length,
  4085. cardClasses,
  4086. };
  4087. });
  4088. logger.info(`Modal debug: ${JSON.stringify(modalInfo)}`);
  4089. // 尝试多种选择器查找作品列表
  4090. const workSelectors = [
  4091. '[class*="video-card"]',
  4092. '[class*="work-item"]',
  4093. '[class*="content-item"]',
  4094. '[class*="modal"] [class*="card"]',
  4095. '[class*="modal"] img',
  4096. '[class*="drawer"] [class*="card"]',
  4097. '[class*="drawer"] img',
  4098. '[role="dialog"] [class*="card"]',
  4099. '[role="dialog"] img',
  4100. '[class*="popup"] img',
  4101. 'img[src*="douyinpic"]', // 直接找抖音图片
  4102. ];
  4103. let workElements: Awaited<ReturnType<typeof page.$$>> = [];
  4104. let usedSelector = '';
  4105. for (const selector of workSelectors) {
  4106. const elements = await page.$$(selector);
  4107. logger.info(`Selector "${selector}" found ${elements.length} elements`);
  4108. if (elements.length > 0 && elements.length < 50) { // 避免选中太多无关元素
  4109. workElements = elements;
  4110. usedSelector = selector;
  4111. break;
  4112. }
  4113. }
  4114. logger.info(`Using selector "${usedSelector}", found ${workElements.length} work items`);
  4115. if (workElements.length > 0) {
  4116. // 首先获取所有作品的评论数信息
  4117. // 根据 HTML 结构:
  4118. // - 作品项容器: div.container-Lkxos9 (类名可能变化,使用 [class*="container-"])
  4119. // - 标题: div.title-LUOP3b (类名可能变化,使用 [class*="title-"])
  4120. // - 评论数: div.right-os7ZB9 > div (类名可能变化,使用 [class*="right-"] > div)
  4121. const workInfoList = await page.evaluate(() => {
  4122. const works: Array<{ index: number; title: string; commentCount: number }> = [];
  4123. // 查找作品列表容器中的所有作品项
  4124. // 根据用户提供的 HTML,作品项的类名是 container-Lkxos9
  4125. const workContainers = document.querySelectorAll('[role="dialog"] [class*="container-"]');
  4126. console.log(`Found ${workContainers.length} work containers`);
  4127. workContainers.forEach((container, index) => {
  4128. // 检查是否包含图片(确认是作品项而不是其他容器)
  4129. const img = container.querySelector('img[src*="douyinpic"]');
  4130. if (!img) {
  4131. console.log(`Container ${index} has no douyinpic image, skipping`);
  4132. return;
  4133. }
  4134. // 提取标题
  4135. const titleEl = container.querySelector('[class*="title-"]');
  4136. const title = titleEl?.textContent?.trim() || `作品 ${works.length + 1}`;
  4137. // 提取评论数 - 在 right- 容器的最后一个 div 中
  4138. let commentCount = 0;
  4139. const rightContainer = container.querySelector('[class*="right-"]');
  4140. if (rightContainer) {
  4141. // 获取 right 容器下的所有直接 div 子元素
  4142. const divs = rightContainer.querySelectorAll(':scope > div');
  4143. if (divs.length > 0) {
  4144. // 最后一个 div 包含评论数
  4145. const lastDiv = divs[divs.length - 1];
  4146. const text = lastDiv.textContent?.trim() || '0';
  4147. const num = parseInt(text, 10);
  4148. if (!isNaN(num)) {
  4149. commentCount = num;
  4150. }
  4151. }
  4152. }
  4153. console.log(`Work ${works.length}: title="${title.slice(0, 30)}...", commentCount=${commentCount}`);
  4154. works.push({
  4155. index: works.length,
  4156. title: title.slice(0, 100),
  4157. commentCount
  4158. });
  4159. });
  4160. // 如果上面的选择器没找到,尝试备用方法
  4161. if (works.length === 0) {
  4162. console.log('Primary selector failed, trying fallback...');
  4163. // 直接查找包含 douyinpic 图片的元素的父容器
  4164. const images = document.querySelectorAll('[role="dialog"] img[src*="douyinpic"]');
  4165. images.forEach((img, index) => {
  4166. // 向上查找到作品项容器
  4167. let container = img.parentElement;
  4168. while (container && !container.classList.toString().includes('container-')) {
  4169. container = container.parentElement;
  4170. }
  4171. if (container) {
  4172. const titleEl = container.querySelector('[class*="title-"]');
  4173. const title = titleEl?.textContent?.trim() || `作品 ${index + 1}`;
  4174. // 查找评论数
  4175. let commentCount = 0;
  4176. const rightEl = container.querySelector('[class*="right-"]');
  4177. if (rightEl) {
  4178. const text = rightEl.textContent?.trim() || '';
  4179. // 提取最后出现的数字
  4180. const matches = text.match(/\d+/g);
  4181. if (matches && matches.length > 0) {
  4182. commentCount = parseInt(matches[matches.length - 1], 10);
  4183. }
  4184. }
  4185. works.push({ index, title: title.slice(0, 100), commentCount });
  4186. }
  4187. });
  4188. }
  4189. return works;
  4190. });
  4191. logger.info(`Work info list (${workInfoList.length} items): ${JSON.stringify(workInfoList)}`);
  4192. // 过滤出评论数 > 0 的作品,或者评论数未知(-1)的作品
  4193. const worksWithComments = workInfoList.filter(w => w.commentCount > 0 || w.commentCount === -1);
  4194. logger.info(`Found ${worksWithComments.length} works with comments > 0 or unknown (out of ${workInfoList.length})`);
  4195. // 如果所有作品评论数都是0,则不处理任何作品
  4196. const allZero = workInfoList.every(w => w.commentCount === 0);
  4197. if (allZero) {
  4198. logger.info('All works have 0 comments, skipping all');
  4199. }
  4200. // 如果没有找到评论数信息或有未知的,处理这些作品
  4201. const indicesToProcess = allZero
  4202. ? []
  4203. : (worksWithComments.length > 0
  4204. ? worksWithComments.map(w => w.index)
  4205. : Array.from({ length: Math.min(workElements.length, 10) }, (_, i) => i));
  4206. logger.info(`Will process work indices: ${indicesToProcess.join(', ')}`);
  4207. // 遍历每个有评论的作品
  4208. for (let idx = 0; idx < indicesToProcess.length; idx++) {
  4209. const i = indicesToProcess[idx];
  4210. try {
  4211. const workInfo = workInfoList.find(w => w.index === i);
  4212. logger.info(`Processing work ${idx + 1}/${indicesToProcess.length} (index=${i}, title="${workInfo?.title}", expectedComments=${workInfo?.commentCount})...`);
  4213. // 重新打开选择作品弹窗
  4214. if (idx > 0) {
  4215. await selectBtn.click();
  4216. await page.waitForTimeout(3000);
  4217. }
  4218. // 重新获取元素列表(因为 DOM 可能已变化)
  4219. const currentItems = await page.$$(usedSelector);
  4220. if (i < currentItems.length) {
  4221. // 滚动到元素可见
  4222. await currentItems[i].scrollIntoViewIfNeeded();
  4223. await page.waitForTimeout(500);
  4224. // 点击元素
  4225. await currentItems[i].click();
  4226. await page.waitForTimeout(4000);
  4227. // 获取评论
  4228. const title = await getCurrentWorkTitle();
  4229. const comments = await extractCommentsFromCurrentPage();
  4230. logger.info(`Work index=${i}: title="${title.slice(0, 50)}", comments=${comments.length}`);
  4231. // 检查是否已经获取过这个作品的评论
  4232. const exists = allWorkComments.some(w =>
  4233. w.videoTitle === title ||
  4234. (w.comments.length > 0 && comments.length > 0 &&
  4235. w.comments[0].content === comments[0].content)
  4236. );
  4237. if (!exists && (comments.length > 0 || title)) {
  4238. allWorkComments.push({
  4239. videoId: `video_${Date.now()}_${i}`,
  4240. videoTitle: title || workInfo?.title || `作品 ${i + 1}`,
  4241. videoCoverUrl: '',
  4242. comments,
  4243. });
  4244. logger.info(`Work index=${i}: Saved ${comments.length} comments`);
  4245. } else {
  4246. logger.info(`Work index=${i}: Skipped (duplicate or empty)`);
  4247. }
  4248. }
  4249. } catch (err) {
  4250. logger.warn(`Error processing work index=${i}:`, err);
  4251. }
  4252. }
  4253. } else {
  4254. logger.warn('No work items found in modal');
  4255. }
  4256. // 按 Escape 关闭弹窗
  4257. try {
  4258. await page.keyboard.press('Escape');
  4259. await page.waitForTimeout(500);
  4260. } catch { }
  4261. } else {
  4262. logger.warn('"选择作品" button not found, only default comments will be returned');
  4263. }
  4264. await page.close();
  4265. await context.close();
  4266. await browser.close();
  4267. const totalComments = allWorkComments.reduce((sum, w) => sum + w.comments.length, 0);
  4268. logger.info(`Total: fetched ${totalComments} comments from ${allWorkComments.length} works`);
  4269. return allWorkComments;
  4270. } catch (error) {
  4271. logger.error('Error fetching Douyin comments:', error);
  4272. try {
  4273. await browser.close();
  4274. } catch { }
  4275. return allWorkComments;
  4276. }
  4277. }
  4278. /**
  4279. * 通过 Node ????? 获取小红书评论 - 一次性获取所有作品的评论
  4280. */
  4281. async fetchXiaohongshuCommentsViaApi(cookies: CookieData[]): Promise<WorkComments[]> {
  4282. const browser = await launchBrowser({
  4283. headless: true,
  4284. args: ['--no-sandbox', '--disable-setuid-sandbox'],
  4285. });
  4286. const allWorkComments: WorkComments[] = [];
  4287. try {
  4288. const context = await browser.newContext({
  4289. viewport: { width: 1920, height: 1080 },
  4290. userAgent: 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
  4291. });
  4292. // 设置 Cookie
  4293. const playwrightCookies = this.normalizePlaywrightCookies(cookies.map(c => ({
  4294. name: c.name,
  4295. value: c.value,
  4296. domain: c.domain || '.xiaohongshu.com',
  4297. path: c.path || '/',
  4298. })));
  4299. await context.addCookies(playwrightCookies);
  4300. logger.info(`[Xiaohongshu Comments] Set ${playwrightCookies.length} cookies`);
  4301. const page = await context.newPage();
  4302. // 用于捕获评论数据
  4303. const capturedComments: Map<string, CommentItem[]> = new Map();
  4304. const capturedNotes: Array<{
  4305. noteId: string;
  4306. title: string;
  4307. coverUrl: string;
  4308. }> = [];
  4309. // 设置 API 响应监听器
  4310. const xhsCommentResponseHandler = async (response: any) => {
  4311. const url = response.url();
  4312. try {
  4313. // 监听笔记列表 API
  4314. if (url.includes('/api/galaxy/creator/content/note_list') ||
  4315. url.includes('/api/galaxy/creator/notes')) {
  4316. const data = await response.json();
  4317. logger.info(`[Xiaohongshu API] Notes list: ${safeStringify(data)}`);
  4318. const notes = data?.data?.notes || data?.data?.list || [];
  4319. for (const note of notes) {
  4320. capturedNotes.push({
  4321. noteId: note.note_id || note.id || '',
  4322. title: note.title || note.desc || '',
  4323. coverUrl: note.cover?.url || note.cover || '',
  4324. });
  4325. }
  4326. }
  4327. // 监听评论列表 API
  4328. if (url.includes('/api/sns/web/v2/comment/page') ||
  4329. url.includes('/api/galaxy/creator/comment') ||
  4330. url.includes('/api/sns/v1/note/comment')) {
  4331. const data = await response.json();
  4332. logger.info(`[Xiaohongshu API] Comments: ${safeStringify(data)}`);
  4333. const comments: CommentItem[] = [];
  4334. const commentList = data?.data?.comments || data?.comments || [];
  4335. for (const comment of commentList) {
  4336. comments.push({
  4337. commentId: comment.id || comment.comment_id || `xhs_${Date.now()}`,
  4338. authorId: comment.user_info?.user_id || comment.user_id || '',
  4339. authorName: comment.user_info?.nickname || comment.nickname || '',
  4340. authorAvatar: comment.user_info?.image || comment.avatar || '',
  4341. content: comment.content || '',
  4342. likeCount: comment.like_count || 0,
  4343. commentTime: comment.create_time || comment.time || '',
  4344. parentCommentId: comment.target_comment_id || undefined,
  4345. });
  4346. // 处理子评论
  4347. const subComments = comment.sub_comments || comment.replies || [];
  4348. for (const sub of subComments) {
  4349. comments.push({
  4350. commentId: sub.id || sub.comment_id || `xhs_sub_${Date.now()}`,
  4351. authorId: sub.user_info?.user_id || sub.user_id || '',
  4352. authorName: sub.user_info?.nickname || sub.nickname || '',
  4353. authorAvatar: sub.user_info?.image || sub.avatar || '',
  4354. content: sub.content || '',
  4355. likeCount: sub.like_count || 0,
  4356. commentTime: sub.create_time || sub.time || '',
  4357. parentCommentId: comment.id || comment.comment_id || undefined,
  4358. });
  4359. }
  4360. }
  4361. // 尝试从 URL 获取笔记 ID
  4362. const noteIdMatch = url.match(/note_id=([^&]+)/) || url.match(/noteId=([^&]+)/);
  4363. const noteId = noteIdMatch?.[1] || `note_${Date.now()}`;
  4364. if (comments.length > 0) {
  4365. const existing = capturedComments.get(noteId) || [];
  4366. capturedComments.set(noteId, [...existing, ...comments]);
  4367. }
  4368. }
  4369. } catch { }
  4370. };
  4371. page.on('response', xhsCommentResponseHandler);
  4372. // 导航到评论管理页面
  4373. logger.info('[Xiaohongshu Comments] Navigating to comment management...');
  4374. await page.goto('https://creator.xiaohongshu.com/creator/comment', {
  4375. waitUntil: 'domcontentloaded',
  4376. timeout: 60000,
  4377. });
  4378. await page.waitForTimeout(5000);
  4379. // 检查是否需要登录
  4380. const currentUrl = page.url();
  4381. if (currentUrl.includes('login') || currentUrl.includes('passport')) {
  4382. logger.warn('[Xiaohongshu Comments] Cookie expired, need re-login');
  4383. await browser.close();
  4384. return allWorkComments;
  4385. }
  4386. // 尝试加载更多评论
  4387. for (let i = 0; i < 5; i++) {
  4388. await page.evaluate(() => {
  4389. window.scrollBy(0, 500);
  4390. });
  4391. await page.waitForTimeout(1000);
  4392. }
  4393. // 等待 API 响应
  4394. await page.waitForTimeout(3000);
  4395. // 将捕获的评论转换为 WorkComments 格式
  4396. for (const [noteId, comments] of capturedComments) {
  4397. const noteInfo = capturedNotes.find(n => n.noteId === noteId);
  4398. allWorkComments.push({
  4399. videoId: noteId,
  4400. videoTitle: noteInfo?.title || `笔记 ${noteId.slice(0, 10)}`,
  4401. videoCoverUrl: noteInfo?.coverUrl || '',
  4402. comments,
  4403. });
  4404. }
  4405. // 如果没有从 API 获取到评论,尝试从页面提取
  4406. if (allWorkComments.length === 0) {
  4407. logger.info('[Xiaohongshu Comments] No comments from API, extracting from page...');
  4408. const pageComments = await page.evaluate(() => {
  4409. const result: Array<{
  4410. commentId: string;
  4411. authorName: string;
  4412. authorAvatar: string;
  4413. content: string;
  4414. likeCount: number;
  4415. commentTime: string;
  4416. }> = [];
  4417. const commentItems = document.querySelectorAll('[class*="comment-item"], [class*="comment-card"]');
  4418. commentItems.forEach((item, index) => {
  4419. try {
  4420. const authorEl = item.querySelector('[class*="author"], [class*="name"]');
  4421. const avatarEl = item.querySelector('img');
  4422. const contentEl = item.querySelector('[class*="content"]');
  4423. const timeEl = item.querySelector('[class*="time"]');
  4424. const likeEl = item.querySelector('[class*="like"] span');
  4425. result.push({
  4426. commentId: `xhs_page_${index}`,
  4427. authorName: authorEl?.textContent?.trim() || '',
  4428. authorAvatar: avatarEl?.src || '',
  4429. content: contentEl?.textContent?.trim() || '',
  4430. likeCount: parseInt(likeEl?.textContent || '0') || 0,
  4431. commentTime: timeEl?.textContent?.trim() || '',
  4432. });
  4433. } catch { }
  4434. });
  4435. return result;
  4436. });
  4437. if (pageComments.length > 0) {
  4438. allWorkComments.push({
  4439. videoId: 'page_comments',
  4440. videoTitle: '页面评论',
  4441. videoCoverUrl: '',
  4442. comments: pageComments.map(c => ({
  4443. ...c,
  4444. authorId: '',
  4445. })),
  4446. });
  4447. }
  4448. }
  4449. page.off('response', xhsCommentResponseHandler);
  4450. await page.close();
  4451. await context.close();
  4452. await browser.close();
  4453. const totalComments = allWorkComments.reduce((sum, w) => sum + w.comments.length, 0);
  4454. logger.info(`[Xiaohongshu Comments] Total: fetched ${totalComments} comments from ${allWorkComments.length} works`);
  4455. return allWorkComments;
  4456. } catch (error) {
  4457. logger.error('[Xiaohongshu Comments] Error:', error);
  4458. try {
  4459. await browser.close();
  4460. } catch { }
  4461. return allWorkComments;
  4462. }
  4463. }
  4464. /**
  4465. * 获取百家号评论
  4466. */
  4467. async fetchBaijiahaoCommentsViaApi(cookies: CookieData[]): Promise<WorkComments[]> {
  4468. return this.fetchBaijiahaoCommentsByBrowser(cookies);
  4469. }
  4470. /**
  4471. * 获取百家号评论
  4472. */
  4473. private async fetchBaijiahaoCommentsByBrowser(cookies: CookieData[]): Promise<WorkComments[]> {
  4474. const browser = await launchBrowser({
  4475. headless: true,
  4476. args: ['--no-sandbox', '--disable-setuid-sandbox'],
  4477. });
  4478. try {
  4479. const context = await browser.newContext({
  4480. viewport: { width: 1920, height: 1080 },
  4481. userAgent:
  4482. 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
  4483. });
  4484. await context.addCookies(this.normalizePlaywrightCookies(
  4485. cookies.map((cookie) => ({
  4486. name: cookie.name,
  4487. value: cookie.value,
  4488. domain: cookie.domain || '.baidu.com',
  4489. path: cookie.path || '/',
  4490. }))
  4491. ));
  4492. const page = await context.newPage();
  4493. await page.goto('https://baijiahao.baidu.com/builder/rc/commentmanage/comment/all', {
  4494. waitUntil: 'domcontentloaded',
  4495. timeout: 30_000,
  4496. });
  4497. await page.waitForTimeout(3_000);
  4498. const currentUrl = page.url();
  4499. if (currentUrl.includes('login') || currentUrl.includes('passport.baidu.com')) {
  4500. return [];
  4501. }
  4502. const containerSelector =
  4503. '.client_pages_newComment_comment_all_list .cheetah-ui-pro-scroll-view .list-container';
  4504. await page.waitForSelector(containerSelector, { timeout: 15_000 });
  4505. for (let i = 0; i < 5; i += 1) {
  4506. await page.evaluate(() => {
  4507. const container = document.querySelector(
  4508. '.client_pages_newComment_comment_all_list .cheetah-ui-pro-scroll-view .list-container'
  4509. ) as HTMLElement | null;
  4510. if (container) {
  4511. container.scrollTop = container.scrollHeight;
  4512. } else {
  4513. window.scrollBy(0, 600);
  4514. }
  4515. const more = document.querySelector(
  4516. '.client_pages_newComment_components_loadMore, .client_pages_newComment_comment_all_list .client_pages_newComment_components_loadMore'
  4517. ) as HTMLElement | null;
  4518. more?.click();
  4519. });
  4520. await page.waitForTimeout(1_200);
  4521. }
  4522. const rawComments = await page.evaluate(() => {
  4523. const result: Array<{
  4524. workTitle: string;
  4525. content: string;
  4526. authorName: string;
  4527. authorAvatar: string;
  4528. commentTime: string;
  4529. }> = [];
  4530. const listRoot = document.querySelector(
  4531. '.client_pages_newComment_comment_all_list .cheetah-ui-pro-scroll-view .list-container'
  4532. );
  4533. if (!listRoot) {
  4534. return result;
  4535. }
  4536. const items = listRoot.querySelectorAll('.client_pages_newComment_comment_all_listItem');
  4537. items.forEach((item) => {
  4538. const workTitle =
  4539. item.querySelector('.title-wrapper .title-content')?.textContent?.trim() || '';
  4540. const content =
  4541. item
  4542. .querySelector('.content-wrapper .content .content-w-highlight, .content-wrapper .content')
  4543. ?.textContent?.trim() || '';
  4544. if (!workTitle && !content) {
  4545. return;
  4546. }
  4547. result.push({
  4548. workTitle,
  4549. content,
  4550. authorName:
  4551. item.querySelector('.content-wrapper .user-container .name')?.textContent?.trim() || '',
  4552. authorAvatar:
  4553. (item.querySelector('.comment-card-avatar-wrapper-card-avatar img') as HTMLImageElement | null)
  4554. ?.src || '',
  4555. commentTime:
  4556. item.querySelector('.content-wrapper .info-wrapper span:nth-child(1)')?.textContent?.trim() ||
  4557. '',
  4558. });
  4559. });
  4560. return result;
  4561. });
  4562. const grouped = new Map<string, WorkComments>();
  4563. for (const item of rawComments) {
  4564. const key = item.workTitle || '未分类作品';
  4565. if (!grouped.has(key)) {
  4566. grouped.set(key, {
  4567. videoId: '',
  4568. videoTitle: key,
  4569. videoCoverUrl: '',
  4570. comments: [],
  4571. });
  4572. }
  4573. grouped.get(key)!.comments.push({
  4574. commentId: `bjh_${Date.now()}_${Math.random().toString(36).slice(2, 8)}`,
  4575. authorId: '',
  4576. authorName: item.authorName || '未知用户',
  4577. authorAvatar: item.authorAvatar,
  4578. content: item.content,
  4579. likeCount: 0,
  4580. commentTime: item.commentTime,
  4581. });
  4582. }
  4583. return Array.from(grouped.values()).filter((item) => item.comments.length > 0);
  4584. } catch (error) {
  4585. logger.error('[Baijiahao Comments] Browser fallback failed:', error);
  4586. return [];
  4587. } finally {
  4588. await browser.close().catch(() => undefined);
  4589. }
  4590. }
  4591. /**
  4592. * 获取微信视频号评论
  4593. */
  4594. async fetchWeixinVideoCommentsViaApi(cookies: CookieData[]): Promise<WorkComments[]> {
  4595. const browser = await launchBrowser({
  4596. headless: true,
  4597. args: ['--no-sandbox', '--disable-setuid-sandbox'],
  4598. });
  4599. const allWorkComments: WorkComments[] = [];
  4600. try {
  4601. const context = await browser.newContext({
  4602. viewport: { width: 1920, height: 1080 },
  4603. userAgent: 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
  4604. });
  4605. // 设置 Cookie
  4606. const playwrightCookies = this.normalizePlaywrightCookies(cookies.map(c => ({
  4607. name: c.name,
  4608. value: c.value,
  4609. domain: c.domain || '.weixin.qq.com',
  4610. path: c.path || '/',
  4611. })));
  4612. await context.addCookies(playwrightCookies);
  4613. logger.info(`[Weixin Video Comments] Set ${playwrightCookies.length} cookies`);
  4614. const page = await context.newPage();
  4615. // 用于捕获评论数据
  4616. const capturedComments: Map<string, CommentItem[]> = new Map();
  4617. const capturedWorks: Array<{
  4618. workId: string;
  4619. title: string;
  4620. coverUrl: string;
  4621. }> = [];
  4622. // 设置 API 响应监听器
  4623. const weixinCommentResponseHandler = async (response: any) => {
  4624. const url = response.url();
  4625. try {
  4626. // 监听作品列表 API
  4627. if (url.includes('/mmfinderassistant-bin/post/post_list')) {
  4628. const data = await response.json();
  4629. logger.info(`[Weixin Video API] Works list: ${safeStringify(data)}`);
  4630. const posts = data?.data?.list || [];
  4631. for (const post of posts) {
  4632. capturedWorks.push({
  4633. workId: post.objectNonce || post.id || '',
  4634. title: post.title || post.desc || '',
  4635. coverUrl: post.cover?.url || post.cover || '',
  4636. });
  4637. }
  4638. }
  4639. // 监听评论列表 API
  4640. if (url.includes('/mmfinderassistant-bin/comment/comment_list')) {
  4641. const data = await response.json();
  4642. logger.info(`[Weixin Video API] Comments: ${safeStringify(data)}`);
  4643. const comments: CommentItem[] = [];
  4644. const commentList = data?.data?.commentList || data?.comments || [];
  4645. for (const comment of commentList) {
  4646. comments.push({
  4647. commentId: comment.commentId || comment.id || `weixin_${Date.now()}`,
  4648. authorId: comment.commenterInfo?.identifier || comment.authorId || '',
  4649. authorName: comment.commenterInfo?.nickName || comment.nickname || comment.nick_name || '',
  4650. authorAvatar: comment.commenterInfo?.headUrl || comment.avatar || '',
  4651. content: comment.content || '',
  4652. likeCount: comment.likeCnt || comment.like_count || 0,
  4653. commentTime: comment.createTime || comment.create_time || '',
  4654. parentCommentId: comment.parentCommentId || undefined,
  4655. });
  4656. // 处理子评论
  4657. const subComments = comment.subCommentList || comment.sub_comments || [];
  4658. for (const sub of subComments) {
  4659. comments.push({
  4660. commentId: sub.commentId || sub.id || `weixin_sub_${Date.now()}`,
  4661. authorId: sub.commenterInfo?.identifier || sub.authorId || '',
  4662. authorName: sub.commenterInfo?.nickName || sub.nickname || sub.nick_name || '',
  4663. authorAvatar: sub.commenterInfo?.headUrl || sub.avatar || '',
  4664. content: sub.content || '',
  4665. likeCount: sub.likeCnt || sub.like_count || 0,
  4666. commentTime: sub.createTime || sub.create_time || '',
  4667. parentCommentId: comment.commentId || comment.id || undefined,
  4668. });
  4669. }
  4670. }
  4671. // 尝试从 URL 获取作品 ID
  4672. const workIdMatch = url.match(/objectNonce=([^&]+)/) || url.match(/workId=([^&]+)/);
  4673. const workId = workIdMatch?.[1] || `work_${Date.now()}`;
  4674. if (comments.length > 0) {
  4675. const existing = capturedComments.get(workId) || [];
  4676. capturedComments.set(workId, [...existing, ...comments]);
  4677. }
  4678. }
  4679. } catch { }
  4680. };
  4681. page.on('response', weixinCommentResponseHandler);
  4682. // 导航到评论管理页面
  4683. logger.info('[Weixin Video Comments] Navigating to comment management...');
  4684. await page.goto('https://channels.weixin.qq.com/platform/interaction/comment', {
  4685. waitUntil: 'domcontentloaded',
  4686. timeout: 60000,
  4687. });
  4688. await page.waitForTimeout(5000);
  4689. // 检查是否需要登录
  4690. const currentUrl = page.url();
  4691. if (currentUrl.includes('login') || currentUrl.includes('passport')) {
  4692. logger.warn('[Weixin Video Comments] Cookie expired, need re-login');
  4693. await browser.close();
  4694. return allWorkComments;
  4695. }
  4696. // 尝试加载更多评论
  4697. for (let i = 0; i < 5; i++) {
  4698. await page.evaluate(() => {
  4699. window.scrollBy(0, 500);
  4700. });
  4701. await page.waitForTimeout(1000);
  4702. }
  4703. // 等待 API 响应
  4704. await page.waitForTimeout(3000);
  4705. // 将捕获的评论转换为 WorkComments 格式
  4706. for (const [workId, comments] of capturedComments) {
  4707. const workInfo = capturedWorks.find(w => w.workId === workId);
  4708. allWorkComments.push({
  4709. videoId: workId,
  4710. videoTitle: workInfo?.title || `作品 ${workId.slice(0, 10)}`,
  4711. videoCoverUrl: workInfo?.coverUrl || '',
  4712. comments,
  4713. });
  4714. }
  4715. // 如果没有从 API 获取到评论,尝试从页面提取
  4716. if (allWorkComments.length === 0) {
  4717. logger.info('[Weixin Video Comments] No comments from API, extracting from page...');
  4718. const pageComments = await page.evaluate(() => {
  4719. const result: Array<{
  4720. commentId: string;
  4721. authorName: string;
  4722. authorAvatar: string;
  4723. content: string;
  4724. likeCount: number;
  4725. commentTime: string;
  4726. }> = [];
  4727. const commentItems = document.querySelectorAll('[class*="comment-item"], [class*="comment-card"]');
  4728. commentItems.forEach((item, index) => {
  4729. try {
  4730. const authorEl = item.querySelector('[class*="author"], [class*="name"]');
  4731. const avatarEl = item.querySelector('img');
  4732. const contentEl = item.querySelector('[class*="content"]');
  4733. const timeEl = item.querySelector('[class*="time"]');
  4734. const likeEl = item.querySelector('[class*="like"] span');
  4735. result.push({
  4736. commentId: `weixin_page_${index}`,
  4737. authorName: authorEl?.textContent?.trim() || '',
  4738. authorAvatar: avatarEl?.src || '',
  4739. content: contentEl?.textContent?.trim() || '',
  4740. likeCount: parseInt(likeEl?.textContent || '0') || 0,
  4741. commentTime: timeEl?.textContent?.trim() || '',
  4742. });
  4743. } catch { }
  4744. });
  4745. return result;
  4746. });
  4747. if (pageComments.length > 0) {
  4748. allWorkComments.push({
  4749. videoId: 'page_comments',
  4750. videoTitle: '页面评论',
  4751. videoCoverUrl: '',
  4752. comments: pageComments.map(c => ({
  4753. ...c,
  4754. authorId: '',
  4755. })),
  4756. });
  4757. }
  4758. }
  4759. page.off('response', weixinCommentResponseHandler);
  4760. await page.close();
  4761. await context.close();
  4762. await browser.close();
  4763. const totalComments = allWorkComments.reduce((sum, w) => sum + w.comments.length, 0);
  4764. logger.info(`[Weixin Video Comments] Total: fetched ${totalComments} comments from ${allWorkComments.length} works`);
  4765. return allWorkComments;
  4766. } catch (error) {
  4767. logger.error('[Weixin Video Comments] Error:', error);
  4768. try {
  4769. await browser.close();
  4770. } catch { }
  4771. return allWorkComments;
  4772. }
  4773. }
  4774. }
  4775. export const headlessBrowserService = new HeadlessBrowserService();