软著申请-源代码材料.txt 413 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384238523862387238823892390239123922393239423952396239723982399240024012402240324042405240624072408240924102411241224132414241524162417241824192420242124222423242424252426242724282429243024312432243324342435243624372438243924402441244224432444244524462447244824492450245124522453245424552456245724582459246024612462246324642465246624672468246924702471247224732474247524762477247824792480248124822483248424852486248724882489249024912492249324942495249624972498249925002501250225032504250525062507250825092510251125122513251425152516251725182519252025212522252325242525252625272528252925302531253225332534253525362537253825392540254125422543254425452546254725482549255025512552255325542555255625572558255925602561256225632564256525662567256825692570257125722573257425752576257725782579258025812582258325842585258625872588258925902591259225932594259525962597259825992600260126022603260426052606260726082609261026112612261326142615261626172618261926202621262226232624262526262627262826292630263126322633263426352636263726382639264026412642264326442645264626472648264926502651265226532654265526562657265826592660266126622663266426652666266726682669267026712672267326742675267626772678267926802681268226832684268526862687268826892690269126922693269426952696269726982699270027012702270327042705270627072708270927102711271227132714271527162717271827192720272127222723272427252726272727282729273027312732273327342735273627372738273927402741274227432744274527462747274827492750275127522753275427552756275727582759276027612762276327642765276627672768276927702771277227732774277527762777277827792780278127822783278427852786278727882789279027912792279327942795279627972798279928002801280228032804280528062807280828092810281128122813281428152816281728182819282028212822282328242825282628272828282928302831283228332834283528362837283828392840284128422843284428452846284728482849285028512852285328542855285628572858285928602861286228632864286528662867286828692870287128722873287428752876287728782879288028812882288328842885288628872888288928902891289228932894289528962897289828992900290129022903290429052906290729082909291029112912291329142915291629172918291929202921292229232924292529262927292829292930293129322933293429352936293729382939294029412942294329442945294629472948294929502951295229532954295529562957295829592960296129622963296429652966296729682969297029712972297329742975297629772978297929802981298229832984298529862987298829892990299129922993299429952996299729982999300030013002300330043005300630073008300930103011301230133014301530163017301830193020302130223023302430253026302730283029303030313032303330343035303630373038303930403041304230433044304530463047304830493050305130523053305430553056305730583059306030613062306330643065306630673068306930703071307230733074307530763077307830793080308130823083308430853086308730883089309030913092309330943095309630973098309931003101310231033104310531063107310831093110311131123113311431153116311731183119312031213122312331243125312631273128312931303131313231333134313531363137313831393140314131423143314431453146314731483149315031513152315331543155315631573158315931603161316231633164316531663167316831693170317131723173317431753176317731783179318031813182318331843185318631873188318931903191319231933194319531963197319831993200320132023203320432053206320732083209321032113212321332143215321632173218321932203221322232233224322532263227322832293230323132323233323432353236323732383239324032413242324332443245324632473248324932503251325232533254325532563257325832593260326132623263326432653266326732683269327032713272327332743275327632773278327932803281328232833284328532863287328832893290329132923293329432953296329732983299330033013302330333043305330633073308330933103311331233133314331533163317331833193320332133223323332433253326332733283329333033313332333333343335333633373338333933403341334233433344334533463347334833493350335133523353335433553356335733583359336033613362336333643365336633673368336933703371337233733374337533763377337833793380338133823383338433853386338733883389339033913392339333943395339633973398339934003401340234033404340534063407340834093410341134123413341434153416341734183419342034213422342334243425342634273428342934303431343234333434343534363437343834393440344134423443344434453446344734483449345034513452345334543455345634573458345934603461346234633464346534663467346834693470347134723473347434753476347734783479348034813482348334843485348634873488348934903491349234933494349534963497349834993500350135023503350435053506350735083509351035113512351335143515351635173518351935203521352235233524352535263527352835293530353135323533353435353536353735383539354035413542354335443545354635473548354935503551355235533554355535563557355835593560356135623563356435653566356735683569357035713572357335743575357635773578357935803581358235833584358535863587358835893590359135923593359435953596359735983599360036013602360336043605360636073608360936103611361236133614361536163617361836193620362136223623362436253626362736283629363036313632363336343635363636373638363936403641364236433644364536463647364836493650365136523653365436553656365736583659366036613662366336643665366636673668366936703671367236733674367536763677367836793680368136823683368436853686368736883689369036913692369336943695369636973698369937003701370237033704370537063707370837093710371137123713371437153716371737183719372037213722372337243725372637273728372937303731373237333734373537363737373837393740374137423743374437453746374737483749375037513752375337543755375637573758375937603761376237633764376537663767376837693770377137723773377437753776377737783779378037813782378337843785378637873788378937903791379237933794379537963797379837993800380138023803380438053806380738083809381038113812381338143815381638173818381938203821382238233824382538263827382838293830383138323833383438353836383738383839384038413842384338443845384638473848384938503851385238533854385538563857385838593860386138623863386438653866386738683869387038713872387338743875387638773878387938803881388238833884388538863887388838893890389138923893389438953896389738983899390039013902390339043905390639073908390939103911391239133914391539163917391839193920392139223923392439253926392739283929393039313932393339343935393639373938393939403941394239433944394539463947394839493950395139523953395439553956395739583959396039613962396339643965396639673968396939703971397239733974397539763977397839793980398139823983398439853986398739883989399039913992399339943995399639973998399940004001400240034004400540064007400840094010401140124013401440154016401740184019402040214022402340244025402640274028402940304031403240334034403540364037403840394040404140424043404440454046404740484049405040514052405340544055405640574058405940604061406240634064406540664067406840694070407140724073407440754076407740784079408040814082408340844085408640874088408940904091409240934094409540964097409840994100410141024103410441054106410741084109411041114112411341144115411641174118411941204121412241234124412541264127412841294130413141324133413441354136413741384139414041414142414341444145414641474148414941504151415241534154415541564157415841594160416141624163416441654166416741684169417041714172417341744175417641774178417941804181418241834184418541864187418841894190419141924193419441954196419741984199420042014202420342044205420642074208420942104211421242134214421542164217421842194220422142224223422442254226422742284229423042314232423342344235423642374238423942404241424242434244424542464247424842494250425142524253425442554256425742584259426042614262426342644265426642674268426942704271427242734274427542764277427842794280428142824283428442854286428742884289429042914292429342944295429642974298429943004301430243034304430543064307430843094310431143124313431443154316431743184319432043214322432343244325432643274328432943304331433243334334433543364337433843394340434143424343434443454346434743484349435043514352435343544355435643574358435943604361436243634364436543664367436843694370437143724373437443754376437743784379438043814382438343844385438643874388438943904391439243934394439543964397439843994400440144024403440444054406440744084409441044114412441344144415441644174418441944204421442244234424442544264427442844294430443144324433443444354436443744384439444044414442444344444445444644474448444944504451445244534454445544564457445844594460446144624463446444654466446744684469447044714472447344744475447644774478447944804481448244834484448544864487448844894490449144924493449444954496449744984499450045014502450345044505450645074508450945104511451245134514451545164517451845194520452145224523452445254526452745284529453045314532453345344535453645374538453945404541454245434544454545464547454845494550455145524553455445554556455745584559456045614562456345644565456645674568456945704571457245734574457545764577457845794580458145824583458445854586458745884589459045914592459345944595459645974598459946004601460246034604460546064607460846094610461146124613461446154616461746184619462046214622462346244625462646274628462946304631463246334634463546364637463846394640464146424643464446454646464746484649465046514652465346544655465646574658465946604661466246634664466546664667466846694670467146724673467446754676467746784679468046814682468346844685468646874688468946904691469246934694469546964697469846994700470147024703470447054706470747084709471047114712471347144715471647174718471947204721472247234724472547264727472847294730473147324733473447354736473747384739474047414742474347444745474647474748474947504751475247534754475547564757475847594760476147624763476447654766476747684769477047714772477347744775477647774778477947804781478247834784478547864787478847894790479147924793479447954796479747984799480048014802480348044805480648074808480948104811481248134814481548164817481848194820482148224823482448254826482748284829483048314832483348344835483648374838483948404841484248434844484548464847484848494850485148524853485448554856485748584859486048614862486348644865486648674868486948704871487248734874487548764877487848794880488148824883488448854886488748884889489048914892489348944895489648974898489949004901490249034904490549064907490849094910491149124913491449154916491749184919492049214922492349244925492649274928492949304931493249334934493549364937493849394940494149424943494449454946494749484949495049514952495349544955495649574958495949604961496249634964496549664967496849694970497149724973497449754976497749784979498049814982498349844985498649874988498949904991499249934994499549964997499849995000500150025003500450055006500750085009501050115012501350145015501650175018501950205021502250235024502550265027502850295030503150325033503450355036503750385039504050415042504350445045504650475048504950505051505250535054505550565057505850595060506150625063506450655066506750685069507050715072507350745075507650775078507950805081508250835084508550865087508850895090509150925093509450955096509750985099510051015102510351045105510651075108510951105111511251135114511551165117511851195120512151225123512451255126512751285129513051315132513351345135513651375138513951405141514251435144514551465147514851495150515151525153515451555156515751585159516051615162516351645165516651675168516951705171517251735174517551765177517851795180518151825183518451855186518751885189519051915192519351945195519651975198519952005201520252035204520552065207520852095210521152125213521452155216521752185219522052215222522352245225522652275228522952305231523252335234523552365237523852395240524152425243524452455246524752485249525052515252525352545255525652575258525952605261526252635264526552665267526852695270527152725273527452755276527752785279528052815282528352845285528652875288528952905291529252935294529552965297529852995300530153025303530453055306530753085309531053115312531353145315531653175318531953205321532253235324532553265327532853295330533153325333533453355336533753385339534053415342534353445345534653475348534953505351535253535354535553565357535853595360536153625363536453655366536753685369537053715372537353745375537653775378537953805381538253835384538553865387538853895390539153925393539453955396539753985399540054015402540354045405540654075408540954105411541254135414541554165417541854195420542154225423542454255426542754285429543054315432543354345435543654375438543954405441544254435444544554465447544854495450545154525453545454555456545754585459546054615462546354645465546654675468546954705471547254735474547554765477547854795480548154825483548454855486548754885489549054915492549354945495549654975498549955005501550255035504550555065507550855095510551155125513551455155516551755185519552055215522552355245525552655275528552955305531553255335534553555365537553855395540554155425543554455455546554755485549555055515552555355545555555655575558555955605561556255635564556555665567556855695570557155725573557455755576557755785579558055815582558355845585558655875588558955905591559255935594559555965597559855995600560156025603560456055606560756085609561056115612561356145615561656175618561956205621562256235624562556265627562856295630563156325633563456355636563756385639564056415642564356445645564656475648564956505651565256535654565556565657565856595660566156625663566456655666566756685669567056715672567356745675567656775678567956805681568256835684568556865687568856895690569156925693569456955696569756985699570057015702570357045705570657075708570957105711571257135714571557165717571857195720572157225723572457255726572757285729573057315732573357345735573657375738573957405741574257435744574557465747574857495750575157525753575457555756575757585759576057615762576357645765576657675768576957705771577257735774577557765777577857795780578157825783578457855786578757885789579057915792579357945795579657975798579958005801580258035804580558065807580858095810581158125813581458155816581758185819582058215822582358245825582658275828582958305831583258335834583558365837583858395840584158425843584458455846584758485849585058515852585358545855585658575858585958605861586258635864586558665867586858695870587158725873587458755876587758785879588058815882588358845885588658875888588958905891589258935894589558965897589858995900590159025903590459055906590759085909591059115912591359145915591659175918591959205921592259235924592559265927592859295930593159325933593459355936593759385939594059415942594359445945594659475948594959505951595259535954595559565957595859595960596159625963596459655966596759685969597059715972597359745975597659775978597959805981598259835984598559865987598859895990599159925993599459955996599759985999600060016002600360046005600660076008600960106011601260136014601560166017601860196020602160226023602460256026602760286029603060316032603360346035603660376038603960406041604260436044604560466047604860496050605160526053605460556056605760586059606060616062606360646065606660676068606960706071607260736074607560766077607860796080608160826083608460856086608760886089609060916092609360946095609660976098609961006101610261036104610561066107610861096110611161126113611461156116611761186119612061216122612361246125612661276128612961306131613261336134613561366137613861396140614161426143614461456146614761486149615061516152615361546155615661576158615961606161616261636164616561666167616861696170617161726173617461756176617761786179618061816182618361846185618661876188618961906191619261936194619561966197619861996200620162026203620462056206620762086209621062116212621362146215621662176218621962206221622262236224622562266227622862296230623162326233623462356236623762386239624062416242624362446245624662476248624962506251625262536254625562566257625862596260626162626263626462656266626762686269627062716272627362746275627662776278627962806281628262836284628562866287628862896290629162926293629462956296629762986299630063016302630363046305630663076308630963106311631263136314631563166317631863196320632163226323632463256326632763286329633063316332633363346335633663376338633963406341634263436344634563466347634863496350635163526353635463556356635763586359636063616362636363646365636663676368636963706371637263736374637563766377637863796380638163826383638463856386638763886389639063916392639363946395639663976398639964006401640264036404640564066407640864096410641164126413641464156416641764186419642064216422642364246425642664276428642964306431643264336434643564366437643864396440644164426443644464456446644764486449645064516452645364546455645664576458645964606461646264636464646564666467646864696470647164726473647464756476647764786479648064816482648364846485648664876488648964906491649264936494649564966497649864996500650165026503650465056506650765086509651065116512651365146515651665176518651965206521652265236524652565266527652865296530653165326533653465356536653765386539654065416542654365446545654665476548654965506551655265536554655565566557655865596560656165626563656465656566656765686569657065716572657365746575657665776578657965806581658265836584658565866587658865896590659165926593659465956596659765986599660066016602660366046605660666076608660966106611661266136614661566166617661866196620662166226623662466256626662766286629663066316632663366346635663666376638663966406641664266436644664566466647664866496650665166526653665466556656665766586659666066616662666366646665666666676668666966706671667266736674667566766677667866796680668166826683668466856686668766886689669066916692669366946695669666976698669967006701670267036704670567066707670867096710671167126713671467156716671767186719672067216722672367246725672667276728672967306731673267336734673567366737673867396740674167426743674467456746674767486749675067516752675367546755675667576758675967606761676267636764676567666767676867696770677167726773677467756776677767786779678067816782678367846785678667876788678967906791679267936794679567966797679867996800680168026803680468056806680768086809681068116812681368146815681668176818681968206821682268236824682568266827682868296830683168326833683468356836683768386839684068416842684368446845684668476848684968506851685268536854685568566857685868596860686168626863686468656866686768686869687068716872687368746875687668776878687968806881688268836884688568866887688868896890689168926893689468956896689768986899690069016902690369046905690669076908690969106911691269136914691569166917691869196920692169226923692469256926692769286929693069316932693369346935693669376938693969406941694269436944694569466947694869496950695169526953695469556956695769586959696069616962696369646965696669676968696969706971697269736974697569766977697869796980698169826983698469856986698769886989699069916992699369946995699669976998699970007001700270037004700570067007700870097010701170127013701470157016701770187019702070217022702370247025702670277028702970307031703270337034703570367037703870397040704170427043704470457046704770487049705070517052705370547055705670577058705970607061706270637064706570667067706870697070707170727073707470757076707770787079708070817082708370847085708670877088708970907091709270937094709570967097709870997100710171027103710471057106710771087109711071117112711371147115711671177118711971207121712271237124712571267127712871297130713171327133713471357136713771387139714071417142714371447145714671477148714971507151715271537154715571567157715871597160716171627163716471657166716771687169717071717172717371747175717671777178717971807181718271837184718571867187718871897190719171927193719471957196719771987199720072017202720372047205720672077208720972107211721272137214721572167217721872197220722172227223722472257226722772287229723072317232723372347235723672377238723972407241724272437244724572467247724872497250725172527253725472557256725772587259726072617262726372647265726672677268726972707271727272737274727572767277727872797280728172827283728472857286728772887289729072917292729372947295729672977298729973007301730273037304730573067307730873097310731173127313731473157316731773187319732073217322732373247325732673277328732973307331733273337334733573367337733873397340734173427343734473457346734773487349735073517352735373547355735673577358735973607361736273637364736573667367736873697370737173727373737473757376737773787379738073817382738373847385738673877388738973907391739273937394739573967397739873997400740174027403740474057406740774087409741074117412741374147415741674177418741974207421742274237424742574267427742874297430743174327433743474357436743774387439744074417442744374447445744674477448744974507451745274537454745574567457745874597460746174627463746474657466746774687469747074717472747374747475747674777478747974807481748274837484748574867487748874897490749174927493749474957496749774987499750075017502750375047505750675077508750975107511751275137514751575167517751875197520752175227523752475257526752775287529753075317532753375347535753675377538753975407541754275437544754575467547754875497550755175527553755475557556755775587559756075617562756375647565756675677568756975707571757275737574757575767577757875797580758175827583758475857586758775887589759075917592759375947595759675977598759976007601760276037604760576067607760876097610761176127613761476157616761776187619762076217622762376247625762676277628762976307631763276337634763576367637763876397640764176427643764476457646764776487649765076517652765376547655765676577658765976607661766276637664766576667667766876697670767176727673767476757676767776787679768076817682768376847685768676877688768976907691769276937694769576967697769876997700770177027703770477057706770777087709771077117712771377147715771677177718771977207721772277237724772577267727772877297730773177327733773477357736773777387739774077417742774377447745774677477748774977507751775277537754775577567757775877597760776177627763776477657766776777687769777077717772777377747775777677777778777977807781778277837784778577867787778877897790779177927793779477957796779777987799780078017802780378047805780678077808780978107811781278137814781578167817781878197820782178227823782478257826782778287829783078317832783378347835783678377838783978407841784278437844784578467847784878497850785178527853785478557856785778587859786078617862786378647865786678677868786978707871787278737874787578767877787878797880788178827883788478857886788778887889789078917892789378947895789678977898789979007901790279037904790579067907790879097910791179127913791479157916791779187919792079217922792379247925792679277928792979307931793279337934793579367937793879397940794179427943794479457946794779487949795079517952795379547955795679577958795979607961796279637964796579667967796879697970797179727973797479757976797779787979798079817982798379847985798679877988798979907991799279937994799579967997799879998000800180028003800480058006800780088009801080118012801380148015801680178018801980208021802280238024802580268027802880298030803180328033803480358036803780388039804080418042804380448045804680478048804980508051805280538054805580568057805880598060806180628063806480658066806780688069807080718072807380748075807680778078807980808081808280838084808580868087808880898090809180928093809480958096809780988099810081018102810381048105810681078108810981108111811281138114811581168117811881198120812181228123812481258126812781288129813081318132813381348135813681378138813981408141814281438144814581468147814881498150815181528153815481558156815781588159816081618162816381648165816681678168816981708171817281738174817581768177817881798180818181828183818481858186818781888189819081918192819381948195819681978198819982008201820282038204820582068207820882098210821182128213821482158216821782188219822082218222822382248225822682278228822982308231823282338234823582368237823882398240824182428243824482458246824782488249825082518252825382548255825682578258825982608261826282638264826582668267826882698270827182728273827482758276827782788279828082818282828382848285828682878288828982908291829282938294829582968297829882998300830183028303830483058306830783088309831083118312831383148315831683178318831983208321832283238324832583268327832883298330833183328333833483358336833783388339834083418342834383448345834683478348834983508351835283538354835583568357835883598360836183628363836483658366836783688369837083718372837383748375837683778378837983808381838283838384838583868387838883898390839183928393839483958396839783988399840084018402840384048405840684078408840984108411841284138414841584168417841884198420842184228423842484258426842784288429843084318432843384348435843684378438843984408441844284438444844584468447844884498450845184528453845484558456845784588459846084618462846384648465846684678468846984708471847284738474847584768477847884798480848184828483848484858486848784888489849084918492849384948495849684978498849985008501850285038504850585068507850885098510851185128513851485158516851785188519852085218522852385248525852685278528852985308531853285338534853585368537853885398540854185428543854485458546854785488549855085518552855385548555855685578558855985608561856285638564856585668567856885698570857185728573857485758576857785788579858085818582858385848585858685878588858985908591859285938594859585968597859885998600860186028603860486058606860786088609861086118612861386148615861686178618861986208621862286238624862586268627862886298630863186328633863486358636863786388639864086418642864386448645864686478648864986508651865286538654865586568657865886598660866186628663866486658666866786688669867086718672867386748675867686778678867986808681868286838684868586868687868886898690869186928693869486958696869786988699870087018702870387048705870687078708870987108711871287138714871587168717871887198720872187228723872487258726872787288729873087318732873387348735873687378738873987408741874287438744874587468747874887498750875187528753875487558756875787588759876087618762876387648765876687678768876987708771877287738774877587768777877887798780878187828783878487858786878787888789879087918792879387948795879687978798879988008801880288038804880588068807880888098810881188128813881488158816881788188819882088218822882388248825882688278828882988308831883288338834883588368837883888398840884188428843884488458846884788488849885088518852885388548855885688578858885988608861886288638864886588668867886888698870887188728873887488758876887788788879888088818882888388848885888688878888888988908891889288938894889588968897889888998900890189028903890489058906890789088909891089118912891389148915891689178918891989208921892289238924892589268927892889298930893189328933893489358936893789388939894089418942894389448945894689478948894989508951895289538954895589568957895889598960896189628963896489658966896789688969897089718972897389748975897689778978897989808981898289838984898589868987898889898990899189928993899489958996899789988999900090019002900390049005900690079008900990109011901290139014901590169017901890199020902190229023902490259026902790289029903090319032903390349035903690379038903990409041904290439044904590469047904890499050905190529053905490559056905790589059906090619062906390649065906690679068906990709071907290739074907590769077907890799080908190829083908490859086908790889089909090919092909390949095909690979098909991009101910291039104910591069107910891099110911191129113911491159116911791189119912091219122912391249125912691279128912991309131913291339134913591369137913891399140914191429143914491459146914791489149915091519152915391549155915691579158915991609161916291639164916591669167916891699170917191729173917491759176917791789179918091819182918391849185918691879188918991909191919291939194919591969197919891999200920192029203920492059206920792089209921092119212921392149215921692179218921992209221922292239224922592269227922892299230923192329233923492359236923792389239924092419242924392449245924692479248924992509251925292539254925592569257925892599260926192629263926492659266926792689269927092719272927392749275927692779278927992809281928292839284928592869287928892899290929192929293929492959296929792989299930093019302930393049305930693079308930993109311931293139314931593169317931893199320932193229323932493259326932793289329933093319332933393349335933693379338933993409341934293439344934593469347934893499350935193529353935493559356935793589359936093619362936393649365936693679368936993709371937293739374937593769377937893799380938193829383938493859386938793889389939093919392939393949395939693979398939994009401940294039404940594069407940894099410941194129413941494159416941794189419942094219422942394249425942694279428942994309431943294339434943594369437943894399440944194429443944494459446944794489449945094519452945394549455945694579458945994609461946294639464946594669467946894699470947194729473947494759476947794789479948094819482948394849485948694879488948994909491949294939494949594969497949894999500950195029503950495059506950795089509951095119512951395149515951695179518951995209521952295239524952595269527952895299530953195329533953495359536953795389539954095419542954395449545954695479548954995509551955295539554955595569557955895599560956195629563956495659566956795689569957095719572957395749575957695779578957995809581958295839584958595869587958895899590959195929593959495959596959795989599960096019602960396049605960696079608960996109611961296139614961596169617961896199620962196229623962496259626962796289629963096319632963396349635963696379638963996409641964296439644964596469647964896499650965196529653965496559656965796589659966096619662966396649665966696679668966996709671967296739674967596769677967896799680968196829683968496859686968796889689969096919692969396949695969696979698969997009701970297039704970597069707970897099710971197129713971497159716971797189719972097219722972397249725972697279728
  1. ===============================================================================
  2. 智媒通 - 计算机软件著作权申请
  3. 源代码材料(前60页+后60页)
  4. ===============================================================================
  5. 软件全称:智媒通 V1.0
  6. 软件简称:智媒通
  7. 版本号:V1.0
  8. 开发完成日期:2025年
  9. 首次发表日期:2025年
  10. 权利取得方式:原始取得
  11. 权利范围:全部权利
  12. 著作权人:(请填写单位或个人名称)
  13. ===============================================================================
  14. 代码说明
  15. ===============================================================================
  16. 本软件为多平台自媒体账号管理系统,支持抖音、快手、视频号、小红书、百家号等
  17. 主流平台,具备视频自动发布、评论统一管理、数据分析等功能。采用 Electron +
  18. Vue3 桌面客户端 + Express 后端 + Python 发布服务的架构。以下为系统核心源代码。
  19. ===============================================================================
  20. 第一部分:平台发布基类
  21. ===============================================================================
  22. ================================================================================
  23. 文件: server\python\platforms\base.py
  24. ================================================================================
  25. # -*- coding: utf-8 -*-
  26. """
  27. 平台发布基类
  28. 提供通用的发布接口和工具方法
  29. """
  30. import asyncio
  31. import json
  32. import os
  33. import uuid
  34. from abc import ABC, abstractmethod
  35. from dataclasses import dataclass, field
  36. from datetime import datetime
  37. from typing import List, Optional, Callable, Dict, Any
  38. from playwright.async_api import async_playwright, Browser, BrowserContext, Page
  39. @dataclass
  40. class PublishParams:
  41. """发布参数"""
  42. title: str
  43. video_path: str
  44. description: str = ""
  45. cover_path: Optional[str] = None
  46. tags: List[str] = field(default_factory=list)
  47. publish_date: Optional[datetime] = None
  48. location: str = "重庆市"
  49. def __post_init__(self):
  50. if not self.description:
  51. self.description = self.title
  52. @dataclass
  53. class PublishResult:
  54. """发布结果"""
  55. success: bool
  56. platform: str
  57. video_id: str = ""
  58. video_url: str = ""
  59. message: str = ""
  60. error: str = ""
  61. need_captcha: bool = False # 是否需要验证码
  62. captcha_type: str = "" # 验证码类型: phone, slider, image
  63. screenshot_base64: str = "" # 页面截图(Base64)
  64. page_url: str = "" # 当前页面 URL
  65. status: str = "" # 状态: uploading, processing, success, failed, need_captcha, need_action
  66. @dataclass
  67. class WorkItem:
  68. """作品数据"""
  69. work_id: str
  70. title: str
  71. cover_url: str = ""
  72. video_url: str = ""
  73. duration: int = 0 # 秒
  74. status: str = "published" # published, reviewing, rejected, draft
  75. publish_time: str = ""
  76. play_count: int = 0
  77. like_count: int = 0
  78. comment_count: int = 0
  79. share_count: int = 0
  80. collect_count: int = 0
  81. def to_dict(self) -> Dict[str, Any]:
  82. return {
  83. "work_id": self.work_id,
  84. "title": self.title,
  85. "cover_url": self.cover_url,
  86. "video_url": self.video_url,
  87. "duration": self.duration,
  88. "status": self.status,
  89. "publish_time": self.publish_time,
  90. "play_count": self.play_count,
  91. "like_count": self.like_count,
  92. "comment_count": self.comment_count,
  93. "share_count": self.share_count,
  94. "collect_count": self.collect_count,
  95. }
  96. @dataclass
  97. class CommentItem:
  98. """评论数据"""
  99. comment_id: str
  100. parent_comment_id: str
  101. work_id: str
  102. content: str
  103. author_id: str = ""
  104. author_name: str = ""
  105. author_avatar: str = ""
  106. like_count: int = 0
  107. reply_count: int = 0
  108. create_time: str = ""
  109. is_author: bool = False # 是否是作者的评论
  110. replies: List['CommentItem'] = field(default_factory=list)
  111. def to_dict(self) -> Dict[str, Any]:
  112. return {
  113. "comment_id": self.comment_id,
  114. "parent_comment_id": self.parent_comment_id,
  115. "work_id": self.work_id,
  116. "content": self.content,
  117. "author_id": self.author_id,
  118. "author_name": self.author_name,
  119. "author_avatar": self.author_avatar,
  120. "like_count": self.like_count,
  121. "reply_count": self.reply_count,
  122. "create_time": self.create_time,
  123. "is_author": self.is_author,
  124. "replies": [r.to_dict() for r in self.replies],
  125. }
  126. @dataclass
  127. class WorksResult:
  128. """作品列表结果"""
  129. success: bool
  130. platform: str
  131. works: List[WorkItem] = field(default_factory=list)
  132. total: int = 0
  133. has_more: bool = False
  134. next_page: Any = ""
  135. error: str = ""
  136. debug_info: str = "" # 调试信息
  137. def to_dict(self) -> Dict[str, Any]:
  138. return {
  139. "success": self.success,
  140. "platform": self.platform,
  141. "works": [w.to_dict() for w in self.works],
  142. "total": self.total,
  143. "has_more": self.has_more,
  144. "next_page": self.next_page,
  145. "error": self.error,
  146. "debug_info": self.debug_info,
  147. }
  148. @dataclass
  149. class CommentsResult:
  150. """评论列表结果"""
  151. success: bool
  152. platform: str
  153. work_id: str
  154. comments: List[CommentItem] = field(default_factory=list)
  155. total: int = 0
  156. has_more: bool = False
  157. error: str = ""
  158. def to_dict(self) -> Dict[str, Any]:
  159. return {
  160. "success": self.success,
  161. "platform": self.platform,
  162. "work_id": self.work_id,
  163. "comments": [c.to_dict() for c in self.comments],
  164. "total": self.total,
  165. "has_more": self.has_more,
  166. "error": self.error,
  167. }
  168. class BasePublisher(ABC):
  169. """
  170. 平台发布基类
  171. 所有平台发布器都需要继承此类
  172. """
  173. platform_name: str = "base"
  174. login_url: str = ""
  175. publish_url: str = ""
  176. cookie_domain: str = ""
  177. def __init__(self, headless: bool = True):
  178. self.headless = headless
  179. self.browser: Optional[Browser] = None
  180. self.context: Optional[BrowserContext] = None
  181. self.page: Optional[Page] = None
  182. self.on_progress: Optional[Callable[[int, str], None]] = None
  183. self.user_id: Optional[int] = None
  184. self.publish_task_id: Optional[int] = None
  185. self.publish_account_id: Optional[int] = None
  186. self.proxy_config: Optional[Dict[str, Any]] = None
  187. def set_progress_callback(self, callback: Callable[[int, str], None]):
  188. """设置进度回调"""
  189. self.on_progress = callback
  190. def report_progress(self, progress: int, message: str):
  191. """报告进度"""
  192. print(f"[{self.platform_name}] [{progress}%] {message}")
  193. if self.on_progress:
  194. self.on_progress(progress, message)
  195. @staticmethod
  196. def parse_cookies(cookies_str: str) -> list:
  197. """解析 cookie 字符串为列表"""
  198. try:
  199. cookies = json.loads(cookies_str)
  200. if isinstance(cookies, list):
  201. return cookies
  202. except json.JSONDecodeError:
  203. pass
  204. # 字符串格式: name=value; name2=value2
  205. cookies = []
  206. for item in cookies_str.split(';'):
  207. item = item.strip()
  208. if '=' in item:
  209. name, value = item.split('=', 1)
  210. cookies.append({
  211. 'name': name.strip(),
  212. 'value': value.strip(),
  213. 'domain': '',
  214. 'path': '/'
  215. })
  216. return cookies
  217. @staticmethod
  218. def cookies_to_string(cookies: list) -> str:
  219. """将 cookie 列表转换为字符串"""
  220. return '; '.join([f"{c['name']}={c['value']}" for c in cookies])
  221. async def init_browser(self, storage_state: str = None, proxy_config: Dict[str, Any] = None):
  222. """初始化浏览器"""
  223. print(f"[{self.platform_name}] init_browser: headless={self.headless}", flush=True)
  224. playwright = await async_playwright().start()
  225. proxy = proxy_config or self.proxy_config
  226. if proxy and isinstance(proxy, dict) and proxy.get('server'):
  227. print(f"[{self.platform_name}] 使用代理: {proxy.get('server')}", flush=True)
  228. self.browser = await playwright.chromium.launch(headless=self.headless, proxy=proxy)
  229. else:
  230. self.browser = await playwright.chromium.launch(headless=self.headless)
  231. if storage_state and os.path.exists(storage_state):
  232. self.context = await self.browser.new_context(storage_state=storage_state)
  233. else:
  234. self.context = await self.browser.new_context()
  235. self.page = await self.context.new_page()
  236. return self.page
  237. async def set_cookies(self, cookies: list):
  238. """设置 cookies"""
  239. if not self.context:
  240. raise Exception("Browser context not initialized")
  241. # 设置默认域名
  242. for cookie in cookies:
  243. if 'domain' not in cookie or not cookie['domain']:
  244. cookie['domain'] = self.cookie_domain
  245. await self.context.add_cookies(cookies)
  246. async def close_browser(self):
  247. """关闭浏览器"""
  248. if self.context:
  249. await self.context.close()
  250. if self.browser:
  251. await self.browser.close()
  252. async def save_cookies(self, file_path: str):
  253. """保存 cookies 到文件"""
  254. if self.context:
  255. await self.context.storage_state(path=file_path)
  256. async def capture_screenshot(self) -> str:
  257. """截取当前页面截图,返回 Base64 编码"""
  258. import base64
  259. if not self.page:
  260. return ""
  261. try:
  262. screenshot_bytes = await self.page.screenshot(type="jpeg", quality=80)
  263. return base64.b64encode(screenshot_bytes).decode('utf-8')
  264. except Exception as e:
  265. print(f"[{self.platform_name}] 截图失败: {e}")
  266. return ""
  267. async def request_sms_code_from_frontend(self, phone: str = "", timeout_seconds: int = 120, message: str = "") -> str:
  268. node_api_url = os.environ.get('NODEJS_API_URL', 'http://localhost:3000').rstrip('/')
  269. internal_api_key = os.environ.get('INTERNAL_API_KEY', 'internal-api-key-default')
  270. if not self.user_id:
  271. raise Exception("缺少 user_id,无法请求前端输入验证码")
  272. captcha_task_id = f"py_{self.platform_name}_{uuid.uuid4().hex}"
  273. payload = {
  274. "user_id": self.user_id,
  275. "captcha_task_id": captcha_task_id,
  276. "type": "sms",
  277. "phone": phone or "",
  278. "message": message or "请输入短信验证码",
  279. "timeout_seconds": timeout_seconds,
  280. "publish_task_id": self.publish_task_id,
  281. "publish_account_id": self.publish_account_id,
  282. }
  283. import requests
  284. try:
  285. resp = requests.post(
  286. f"{node_api_url}/api/internal/captcha/request",
  287. headers={
  288. "Content-Type": "application/json",
  289. "X-Internal-API-Key": internal_api_key,
  290. },
  291. json=payload,
  292. timeout=timeout_seconds + 30,
  293. )
  294. except Exception as e:
  295. raise Exception(f"请求前端验证码失败: {e}")
  296. try:
  297. data = resp.json()
  298. except Exception:
  299. raise Exception(f"请求前端验证码失败: HTTP {resp.status_code}")
  300. if resp.status_code >= 400 or not data.get("success"):
  301. raise Exception(data.get("error") or data.get("message") or f"请求前端验证码失败: HTTP {resp.status_code}")
  302. code = data.get("code") or ""
  303. if not code:
  304. raise Exception("未收到验证码")
  305. return str(code)
  306. async def ai_analyze_sms_send_state(self, screenshot_base64: str = None) -> dict:
  307. import os
  308. import requests
  309. import json
  310. import re
  311. try:
  312. if not screenshot_base64:
  313. screenshot_base64 = await self.capture_screenshot()
  314. if not screenshot_base64:
  315. return {
  316. "has_sms_modal": False,
  317. "send_button_state": "unknown",
  318. "sent_likely": False,
  319. "block_reason": "unknown",
  320. "suggested_action": "manual_send",
  321. "confidence": 0,
  322. "notes": "无法获取截图",
  323. }
  324. ai_api_key = os.environ.get('DASHSCOPE_API_KEY', '')
  325. ai_base_url = os.environ.get('DASHSCOPE_BASE_URL', 'https://dashscope.aliyuncs.com/compatible-mode/v1')
  326. ai_vision_model = os.environ.get('AI_VISION_MODEL', 'qwen-vl-plus')
  327. if not ai_api_key:
  328. return {
  329. "has_sms_modal": True,
  330. "send_button_state": "unknown",
  331. "sent_likely": False,
  332. "block_reason": "no_ai_key",
  333. "suggested_action": "manual_send",
  334. "confidence": 0,
  335. "notes": "未配置 AI API Key",
  336. }
  337. prompt = """请分析这张网页截图,判断是否处于“短信验证码”验证弹窗/页面,并判断“发送验证码/获取验证码”是否已经触发成功。
  338. 你需要重点识别:
  339. 1) 是否存在短信验证码弹窗(包含“请输入验证码/短信验证码/手机号验证/获取验证码/发送验证码”等)
  340. 2) 发送按钮状态:enabled / disabled / countdown(出现xx秒) / hidden / unknown
  341. 3) 是否已发送成功:例如出现倒计时、按钮禁用、出现“已发送/重新发送/xx秒后重试”等
  342. 4) 是否被阻塞:例如出现滑块/人机验证、频繁发送、风控提示、网络异常等
  343. 请以 JSON 返回:
  344. ```json
  345. {
  346. "has_sms_modal": true,
  347. "send_button_state": "enabled|disabled|countdown|hidden|unknown",
  348. "sent_likely": true,
  349. "block_reason": "none|need_click_send|slider|risk|rate_limit|network|unknown",
  350. "suggested_action": "wait|click_send|solve_slider|manual_send",
  351. "confidence": 0-100,
  352. "notes": "一句话说明你看到的证据"
  353. }
  354. ```"""
  355. headers = {
  356. 'Authorization': f'Bearer {ai_api_key}',
  357. 'Content-Type': 'application/json'
  358. }
  359. payload = {
  360. "model": ai_vision_model,
  361. "messages": [
  362. {
  363. "role": "user",
  364. "content": [
  365. {
  366. "type": "image_url",
  367. "image_url": {
  368. "url": f"data:image/jpeg;base64,{screenshot_base64}"
  369. }
  370. },
  371. {
  372. "type": "text",
  373. "text": prompt
  374. }
  375. ]
  376. }
  377. ],
  378. "max_tokens": 500
  379. }
  380. response = requests.post(
  381. f"{ai_base_url}/chat/completions",
  382. headers=headers,
  383. json=payload,
  384. timeout=30
  385. )
  386. if response.status_code != 200:
  387. return {
  388. "has_sms_modal": True,
  389. "send_button_state": "unknown",
  390. "sent_likely": False,
  391. "block_reason": "network",
  392. "suggested_action": "manual_send",
  393. "confidence": 0,
  394. "notes": f"AI API 返回错误 {response.status_code}",
  395. }
  396. result = response.json()
  397. ai_response = result.get('choices', [{}])[0].get('message', {}).get('content', '')
  398. json_match = re.search(r'```json\\s*([\\s\\S]*?)\\s*```', ai_response)
  399. if json_match:
  400. json_str = json_match.group(1)
  401. else:
  402. json_match = re.search(r'\\{[\\s\\S]*\\}', ai_response)
  403. json_str = json_match.group(0) if json_match else '{}'
  404. try:
  405. data = json.loads(json_str)
  406. except Exception:
  407. data = {}
  408. return {
  409. "has_sms_modal": bool(data.get("has_sms_modal", True)),
  410. "send_button_state": data.get("send_button_state", "unknown"),
  411. "sent_likely": bool(data.get("sent_likely", False)),
  412. "block_reason": data.get("block_reason", "unknown"),
  413. "suggested_action": data.get("suggested_action", "manual_send"),
  414. "confidence": int(data.get("confidence", 0) or 0),
  415. "notes": data.get("notes", ""),
  416. }
  417. except Exception as e:
  418. return {
  419. "has_sms_modal": True,
  420. "send_button_state": "unknown",
  421. "sent_likely": False,
  422. "block_reason": "unknown",
  423. "suggested_action": "manual_send",
  424. "confidence": 0,
  425. "notes": f"AI 分析异常: {e}",
  426. }
  427. async def sync_cookies_to_node(self, cookies: list) -> bool:
  428. import os
  429. import json
  430. import requests
  431. if not self.user_id or not self.publish_account_id:
  432. return False
  433. node_api_url = os.environ.get('NODEJS_API_URL', 'http://localhost:3000').rstrip('/')
  434. internal_api_key = os.environ.get('INTERNAL_API_KEY', 'internal-api-key-default')
  435. try:
  436. payload = {
  437. "user_id": int(self.user_id),
  438. "account_id": int(self.publish_account_id),
  439. "cookies": json.dumps(cookies, ensure_ascii=False),
  440. }
  441. resp = requests.post(
  442. f"{node_api_url}/api/internal/accounts/update-cookies",
  443. headers={
  444. "Content-Type": "application/json",
  445. "X-Internal-API-Key": internal_api_key,
  446. },
  447. json=payload,
  448. timeout=30,
  449. )
  450. if resp.status_code >= 400:
  451. return False
  452. data = resp.json() if resp.content else {}
  453. return bool(data.get("success", True))
  454. except Exception:
  455. return False
  456. async def ai_suggest_playwright_selector(self, goal: str, screenshot_base64: str = None) -> dict:
  457. import os
  458. import requests
  459. import json
  460. import re
  461. try:
  462. if not screenshot_base64:
  463. screenshot_base64 = await self.capture_screenshot()
  464. if not screenshot_base64:
  465. return {"has_selector": False, "selector": "", "confidence": 0, "notes": "无法获取截图"}
  466. ai_api_key = os.environ.get('DASHSCOPE_API_KEY', '')
  467. ai_base_url = os.environ.get('DASHSCOPE_BASE_URL', 'https://dashscope.aliyuncs.com/compatible-mode/v1')
  468. ai_vision_model = os.environ.get('AI_VISION_MODEL', 'qwen-vl-plus')
  469. if not ai_api_key:
  470. return {"has_selector": False, "selector": "", "confidence": 0, "notes": "未配置 AI API Key"}
  471. prompt = f"""请分析这张网页截图,给出一个 Playwright Python 可用的 selector(用于 page.locator(selector))来完成目标操作。
  472. 目标:{goal}
  473. 要求:
  474. 1) selector 尽量稳定(优先 role/text/aria,其次 class,避免过度依赖随机 class)
  475. 2) selector 必须是 Playwright 支持的选择器语法(如:text="发布"、button:has-text("发布")、[role="button"]:has-text("发布") 等)
  476. 3) 只返回一个最优 selector
  477. 以 JSON 返回:
  478. ```json
  479. {{
  480. "has_selector": true,
  481. "selector": "button:has-text(\\"发布\\")",
  482. "confidence": 0-100,
  483. "notes": "你依据的页面证据"
  484. }}
  485. ```"""
  486. headers = {
  487. 'Authorization': f'Bearer {ai_api_key}',
  488. 'Content-Type': 'application/json'
  489. }
  490. payload = {
  491. "model": ai_vision_model,
  492. "messages": [
  493. {
  494. "role": "user",
  495. "content": [
  496. {
  497. "type": "image_url",
  498. "image_url": {
  499. "url": f"data:image/jpeg;base64,{screenshot_base64}"
  500. }
  501. },
  502. {
  503. "type": "text",
  504. "text": prompt
  505. }
  506. ]
  507. }
  508. ],
  509. "max_tokens": 300
  510. }
  511. response = requests.post(
  512. f"{ai_base_url}/chat/completions",
  513. headers=headers,
  514. json=payload,
  515. timeout=30
  516. )
  517. if response.status_code != 200:
  518. return {"has_selector": False, "selector": "", "confidence": 0, "notes": f"AI API 错误 {response.status_code}"}
  519. result = response.json()
  520. ai_response = result.get('choices', [{}])[0].get('message', {}).get('content', '')
  521. json_match = re.search(r'```json\\s*([\\s\\S]*?)\\s*```', ai_response)
  522. if json_match:
  523. json_str = json_match.group(1)
  524. else:
  525. json_match = re.search(r'\\{[\\s\\S]*\\}', ai_response)
  526. json_str = json_match.group(0) if json_match else '{}'
  527. try:
  528. data = json.loads(json_str)
  529. except Exception:
  530. data = {}
  531. selector = str(data.get("selector", "") or "").strip()
  532. has_selector = bool(data.get("has_selector", False)) and bool(selector)
  533. confidence = int(data.get("confidence", 0) or 0)
  534. notes = str(data.get("notes", "") or "")
  535. if not has_selector:
  536. return {"has_selector": False, "selector": "", "confidence": confidence, "notes": notes or "未给出 selector"}
  537. return {"has_selector": True, "selector": selector, "confidence": confidence, "notes": notes}
  538. except Exception as e:
  539. return {"has_selector": False, "selector": "", "confidence": 0, "notes": f"AI selector 异常: {e}"}
  540. async def ai_check_captcha(self, screenshot_base64: str = None) -> dict:
  541. """
  542. 使用 AI 分析截图检测验证码
  543. Args:
  544. screenshot_base64: 截图的 Base64 编码,如果为空则自动获取当前页面截图
  545. Returns:
  546. dict: {
  547. "has_captcha": bool, # 是否有验证码
  548. "captcha_type": str, # 验证码类型: slider, image, phone, rotate, puzzle
  549. "captcha_description": str, # 验证码描述
  550. "confidence": float, # 置信度 0-100
  551. "need_headful": bool # 是否需要切换到有头浏览器
  552. }
  553. """
  554. import os
  555. import requests
  556. try:
  557. # 获取截图
  558. if not screenshot_base64:
  559. screenshot_base64 = await self.capture_screenshot()
  560. if not screenshot_base64:
  561. print(f"[{self.platform_name}] AI验证码检测: 无法获取截图")
  562. return {
  563. "has_captcha": False,
  564. "captcha_type": "",
  565. "captcha_description": "",
  566. "confidence": 0,
  567. "need_headful": False
  568. }
  569. # 获取 AI 配置
  570. ai_api_key = os.environ.get('DASHSCOPE_API_KEY', '')
  571. ai_base_url = os.environ.get('DASHSCOPE_BASE_URL', 'https://dashscope.aliyuncs.com/compatible-mode/v1')
  572. ai_vision_model = os.environ.get('AI_VISION_MODEL', 'qwen-vl-plus')
  573. if not ai_api_key:
  574. print(f"[{self.platform_name}] AI验证码检测: 未配置 AI API Key,使用传统方式检测")
  575. return await self._traditional_captcha_check()
  576. # 构建 AI 请求
  577. prompt = """请分析这张网页截图,判断页面上是否存在验证码。
  578. 请检查以下类型的验证码:
  579. 1. 滑块验证码(需要滑动滑块到指定位置)
  580. 2. 图片验证码(需要选择正确的图片、点击图片上的文字等)
  581. 3. 旋转验证码(需要旋转图片到正确角度)
  582. 4. 拼图验证码(需要拖动拼图块到正确位置)
  583. 5. 手机验证码(需要输入手机收到的验证码)
  584. 6. 计算验证码(需要输入计算结果)
  585. 请以 JSON 格式返回结果:
  586. ```json
  587. {
  588. "has_captcha": true/false,
  589. "captcha_type": "slider/image/phone/rotate/puzzle/calculate/none",
  590. "captcha_description": "验证码的具体描述",
  591. "confidence": 0-100
  592. }
  593. ```
  594. 注意:
  595. - 如果页面有明显的验证码弹窗或验证区域,has_captcha 为 true
  596. - 如果只是普通的登录页面或表单,没有特殊的验证步骤,has_captcha 为 false
  597. - confidence 表示你对判断结果的信心,100 表示非常确定"""
  598. headers = {
  599. 'Authorization': f'Bearer {ai_api_key}',
  600. 'Content-Type': 'application/json'
  601. }
  602. payload = {
  603. "model": ai_vision_model,
  604. "messages": [
  605. {
  606. "role": "user",
  607. "content": [
  608. {
  609. "type": "image_url",
  610. "image_url": {
  611. "url": f"data:image/jpeg;base64,{screenshot_base64}"
  612. }
  613. },
  614. {
  615. "type": "text",
  616. "text": prompt
  617. }
  618. ]
  619. }
  620. ],
  621. "max_tokens": 500
  622. }
  623. print(f"[{self.platform_name}] AI验证码检测: 正在分析截图...")
  624. response = requests.post(
  625. f"{ai_base_url}/chat/completions",
  626. headers=headers,
  627. json=payload,
  628. timeout=30
  629. )
  630. if response.status_code != 200:
  631. print(f"[{self.platform_name}] AI验证码检测: API 返回错误 {response.status_code}")
  632. return await self._traditional_captcha_check()
  633. result = response.json()
  634. ai_response = result.get('choices', [{}])[0].get('message', {}).get('content', '')
  635. print(f"[{self.platform_name}] AI验证码检测响应: {ai_response[:200]}...")
  636. # 解析 AI 响应
  637. import re
  638. json_match = re.search(r'```json\s*([\s\S]*?)\s*```', ai_response)
  639. if json_match:
  640. json_str = json_match.group(1)
  641. else:
  642. # 尝试直接解析
  643. json_match = re.search(r'\{[\s\S]*\}', ai_response)
  644. if json_match:
  645. json_str = json_match.group(0)
  646. else:
  647. json_str = '{}'
  648. try:
  649. ai_result = json.loads(json_str)
  650. except:
  651. ai_result = {}
  652. has_captcha = ai_result.get('has_captcha', False)
  653. captcha_type = ai_result.get('captcha_type', '')
  654. captcha_description = ai_result.get('captcha_description', '')
  655. confidence = ai_result.get('confidence', 0)
  656. # 如果检测到验证码,需要切换到有头浏览器
  657. need_headful = has_captcha and captcha_type not in ['none', '']
  658. print(f"[{self.platform_name}] AI验证码检测结果: has_captcha={has_captcha}, type={captcha_type}, confidence={confidence}")
  659. return {
  660. "has_captcha": has_captcha,
  661. "captcha_type": captcha_type if captcha_type != 'none' else '',
  662. "captcha_description": captcha_description,
  663. "confidence": confidence,
  664. "need_headful": need_headful
  665. }
  666. except Exception as e:
  667. print(f"[{self.platform_name}] AI验证码检测异常: {e}")
  668. import traceback
  669. traceback.print_exc()
  670. return await self._traditional_captcha_check()
  671. async def _traditional_captcha_check(self) -> dict:
  672. """传统方式检测验证码(基于 DOM 元素)"""
  673. if not self.page:
  674. return {
  675. "has_captcha": False,
  676. "captcha_type": "",
  677. "captcha_description": "",
  678. "confidence": 0,
  679. "need_headful": False
  680. }
  681. try:
  682. # 检查常见的验证码选择器
  683. captcha_selectors = [
  684. # 滑块验证码
  685. ('[class*="slider"]', 'slider', '滑块验证码'),
  686. ('[class*="slide-verify"]', 'slider', '滑块验证码'),
  687. ('text="滑动"', 'slider', '滑块验证码'),
  688. ('text="拖动"', 'slider', '滑块验证码'),
  689. # 图片验证码
  690. ('[class*="captcha"]', 'image', '图片验证码'),
  691. ('[class*="verify-img"]', 'image', '图片验证码'),
  692. ('text="点击"', 'image', '图片验证码'),
  693. ('text="选择"', 'image', '图片验证码'),
  694. # 手机验证码
  695. ('text="验证码"', 'phone', '手机验证码'),
  696. ('text="获取验证码"', 'phone', '手机验证码'),
  697. ('[class*="sms-code"]', 'phone', '手机验证码'),
  698. # 旋转验证码
  699. ('text="旋转"', 'rotate', '旋转验证码'),
  700. ('[class*="rotate"]', 'rotate', '旋转验证码'),
  701. ]
  702. for selector, captcha_type, description in captcha_selectors:
  703. try:
  704. count = await self.page.locator(selector).count()
  705. if count > 0:
  706. # 检查是否可见
  707. element = self.page.locator(selector).first
  708. if await element.is_visible():
  709. print(f"[{self.platform_name}] 传统检测: 发现验证码 - {selector}")
  710. return {
  711. "has_captcha": True,
  712. "captcha_type": captcha_type,
  713. "captcha_description": description,
  714. "confidence": 80,
  715. "need_headful": True
  716. }
  717. except:
  718. pass
  719. return {
  720. "has_captcha": False,
  721. "captcha_type": "",
  722. "captcha_description": "",
  723. "confidence": 80,
  724. "need_headful": False
  725. }
  726. except Exception as e:
  727. print(f"[{self.platform_name}] 传统验证码检测异常: {e}")
  728. return {
  729. "has_captcha": False,
  730. "captcha_type": "",
  731. "captcha_description": "",
  732. "confidence": 0,
  733. "need_headful": False
  734. }
  735. async def get_page_url(self) -> str:
  736. """获取当前页面 URL"""
  737. if not self.page:
  738. return ""
  739. try:
  740. return self.page.url
  741. except:
  742. return ""
  743. async def check_publish_status(self) -> dict:
  744. """
  745. 检查发布状态
  746. 返回: {status, screenshot_base64, page_url, message}
  747. """
  748. if not self.page:
  749. return {"status": "error", "message": "页面未初始化"}
  750. try:
  751. screenshot = await self.capture_screenshot()
  752. page_url = await self.get_page_url()
  753. # 检查常见的成功/失败标志
  754. page_content = await self.page.content()
  755. # 检查成功标志
  756. success_keywords = ['发布成功', '上传成功', '发表成功', '提交成功']
  757. for keyword in success_keywords:
  758. if keyword in page_content:
  759. return {
  760. "status": "success",
  761. "screenshot_base64": screenshot,
  762. "page_url": page_url,
  763. "message": "发布成功"
  764. }
  765. # 检查验证码标志
  766. captcha_keywords = ['验证码', '身份验证', '请完成验证', '滑动验证', '图形验证']
  767. for keyword in captcha_keywords:
  768. if keyword in page_content:
  769. return {
  770. "status": "need_captcha",
  771. "screenshot_base64": screenshot,
  772. "page_url": page_url,
  773. "message": f"检测到{keyword}"
  774. }
  775. # 检查失败标志
  776. fail_keywords = ['发布失败', '上传失败', '提交失败', '操作失败']
  777. for keyword in fail_keywords:
  778. if keyword in page_content:
  779. return {
  780. "status": "failed",
  781. "screenshot_base64": screenshot,
  782. "page_url": page_url,
  783. "message": keyword
  784. }
  785. # 默认返回处理中
  786. return {
  787. "status": "processing",
  788. "screenshot_base64": screenshot,
  789. "page_url": page_url,
  790. "message": "处理中"
  791. }
  792. except Exception as e:
  793. return {
  794. "status": "error",
  795. "screenshot_base64": "",
  796. "page_url": "",
  797. "message": str(e)
  798. }
  799. async def wait_for_upload_complete(self, success_selector: str, timeout: int = 300):
  800. """等待上传完成"""
  801. if not self.page:
  802. raise Exception("Page not initialized")
  803. for _ in range(timeout // 3):
  804. try:
  805. count = await self.page.locator(success_selector).count()
  806. if count > 0:
  807. return True
  808. except:
  809. pass
  810. await asyncio.sleep(3)
  811. self.report_progress(30, "正在上传视频...")
  812. return False
  813. @abstractmethod
  814. async def publish(self, cookies: str, params: PublishParams) -> PublishResult:
  815. """
  816. 发布视频 - 子类必须实现
  817. Args:
  818. cookies: cookie 字符串或 JSON
  819. params: 发布参数
  820. Returns:
  821. PublishResult: 发布结果
  822. """
  823. pass
  824. async def get_works(self, cookies: str, page: int = 0, page_size: int = 20) -> WorksResult:
  825. """
  826. 获取作品列表 - 子类可覆盖实现
  827. Args:
  828. cookies: cookie 字符串或 JSON
  829. page: 页码(从0开始)
  830. page_size: 每页数量
  831. Returns:
  832. WorksResult: 作品列表结果
  833. """
  834. return WorksResult(
  835. success=False,
  836. platform=self.platform_name,
  837. error="该平台暂不支持获取作品列表"
  838. )
  839. async def get_comments(self, cookies: str, work_id: str, cursor: str = "") -> CommentsResult:
  840. """
  841. 获取作品评论 - 子类可覆盖实现
  842. Args:
  843. cookies: cookie 字符串或 JSON
  844. work_id: 作品ID
  845. cursor: 分页游标
  846. Returns:
  847. CommentsResult: 评论列表结果
  848. """
  849. return CommentsResult(
  850. success=False,
  851. platform=self.platform_name,
  852. work_id=work_id,
  853. error="该平台暂不支持获取评论"
  854. )
  855. async def run(self, cookies: str, params: PublishParams) -> PublishResult:
  856. """
  857. 运行发布任务
  858. 包装了 publish 方法,添加了异常处理和资源清理
  859. """
  860. try:
  861. return await self.publish(cookies, params)
  862. except Exception as e:
  863. import traceback
  864. traceback.print_exc()
  865. return PublishResult(
  866. success=False,
  867. platform=self.platform_name,
  868. error=str(e)
  869. )
  870. finally:
  871. await self.close_browser()
  872. async def run_get_works(self, cookies: str, page: int = 0, page_size: int = 20) -> WorksResult:
  873. """
  874. 运行获取作品任务
  875. """
  876. try:
  877. return await self.get_works(cookies, page, page_size)
  878. except Exception as e:
  879. import traceback
  880. traceback.print_exc()
  881. return WorksResult(
  882. success=False,
  883. platform=self.platform_name,
  884. error=str(e)
  885. )
  886. finally:
  887. await self.close_browser()
  888. async def run_get_comments(self, cookies: str, work_id: str, cursor: str = "") -> CommentsResult:
  889. """
  890. 运行获取评论任务
  891. """
  892. try:
  893. return await self.get_comments(cookies, work_id, cursor)
  894. except Exception as e:
  895. import traceback
  896. traceback.print_exc()
  897. return CommentsResult(
  898. success=False,
  899. platform=self.platform_name,
  900. work_id=work_id,
  901. error=str(e)
  902. )
  903. finally:
  904. await self.close_browser()
  905. async def check_login_status(self, cookies: str) -> dict:
  906. """
  907. 检查 Cookie 登录状态(通过浏览器访问后台页面检测)
  908. Args:
  909. cookies: cookie 字符串或 JSON
  910. Returns:
  911. dict: {
  912. "success": True,
  913. "valid": True/False,
  914. "need_login": True/False,
  915. "message": "状态描述"
  916. }
  917. """
  918. try:
  919. await self.init_browser()
  920. cookie_list = self.parse_cookies(cookies)
  921. await self.set_cookies(cookie_list)
  922. if not self.page:
  923. raise Exception("Page not initialized")
  924. # 访问平台后台首页
  925. home_url = self.login_url
  926. print(f"[{self.platform_name}] 访问后台页面: {home_url}")
  927. await self.page.goto(home_url, wait_until='domcontentloaded', timeout=30000)
  928. await asyncio.sleep(3)
  929. # 检查当前 URL 是否被重定向到登录页
  930. current_url = self.page.url
  931. print(f"[{self.platform_name}] 当前 URL: {current_url}")
  932. # 登录页特征
  933. login_indicators = ['login', 'passport', 'signin', 'auth']
  934. is_login_page = any(indicator in current_url.lower() for indicator in login_indicators)
  935. # 检查页面是否有登录弹窗
  936. need_login = is_login_page
  937. # 风控/验证码特征
  938. risk_indicators = ['captcha', 'verify', 'challenge', 'risk', 'security', 'safe', 'protect', 'slider']
  939. need_verification = any(indicator in current_url.lower() for indicator in risk_indicators)
  940. if not need_login:
  941. # 检查页面内容是否有登录提示
  942. login_selectors = [
  943. 'text="请先登录"',
  944. 'text="登录后继续"',
  945. 'text="请登录"',
  946. '[class*="login-modal"]',
  947. '[class*="login-dialog"]',
  948. '[class*="login-popup"]',
  949. ]
  950. for selector in login_selectors:
  951. try:
  952. if await self.page.locator(selector).count() > 0:
  953. need_login = True
  954. print(f"[{self.platform_name}] 检测到登录弹窗: {selector}")
  955. break
  956. except:
  957. pass
  958. if not need_login and not need_verification:
  959. verification_selectors = [
  960. 'text="安全验证"',
  961. 'text="验证码"',
  962. 'text="人机验证"',
  963. 'text="滑块"',
  964. 'text="请完成验证"',
  965. 'text="系统检测到异常"',
  966. 'text="访问受限"',
  967. 'text="行为异常"',
  968. ]
  969. for selector in verification_selectors:
  970. try:
  971. if await self.page.locator(selector).count() > 0:
  972. need_verification = True
  973. print(f"[{self.platform_name}] 检测到风控/验证码提示: {selector}")
  974. break
  975. except:
  976. pass
  977. if need_login:
  978. return {
  979. "success": True,
  980. "valid": False,
  981. "need_login": True,
  982. "message": "Cookie 已过期,需要重新登录"
  983. }
  984. elif need_verification:
  985. return {
  986. "success": True,
  987. "valid": False,
  988. "need_login": True,
  989. "message": "触发风控/需要验证"
  990. }
  991. else:
  992. return {
  993. "success": True,
  994. "valid": True,
  995. "need_login": False,
  996. "message": "登录状态有效"
  997. }
  998. except Exception as e:
  999. import traceback
  1000. traceback.print_exc()
  1001. return {
  1002. "success": False,
  1003. "valid": False,
  1004. "need_login": True,
  1005. "error": str(e)
  1006. }
  1007. finally:
  1008. await self.close_browser()
  1009. ================================================================================
  1010. 文件: server\python\app.py
  1011. ================================================================================
  1012. #!/usr/bin/env python3
  1013. # -*- coding: utf-8 -*-
  1014. """
  1015. 智媒通视频发布服务 - 统一入口
  1016. 支持平台: 抖音、小红书、视频号、快手
  1017. 参考项目: matrix (https://github.com/kebenxiaoming/matrix)
  1018. 使用方式:
  1019. python app.py # 启动 HTTP 服务 (端口 5005)
  1020. python app.py --port 8080 # 指定端口
  1021. python app.py --headless false # 显示浏览器窗口
  1022. """
  1023. import asyncio
  1024. import os
  1025. import sys
  1026. import argparse
  1027. import random
  1028. import re
  1029. import time
  1030. # 禁用输出缓冲,确保 print 立即输出
  1031. os.environ['PYTHONUNBUFFERED'] = '1'
  1032. # 修复 Windows 终端中文输出乱码
  1033. if sys.platform == 'win32':
  1034. import io
  1035. sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8', errors='replace', line_buffering=True)
  1036. sys.stderr = io.TextIOWrapper(sys.stderr.buffer, encoding='utf-8', errors='replace', line_buffering=True)
  1037. # 设置环境变量
  1038. os.environ['PYTHONIOENCODING'] = 'utf-8'
  1039. import traceback
  1040. import requests
  1041. from datetime import datetime, date
  1042. from pathlib import Path
  1043. # 确保当前目录在 Python 路径中
  1044. CURRENT_DIR = Path(__file__).parent.resolve()
  1045. if str(CURRENT_DIR) not in sys.path:
  1046. sys.path.insert(0, str(CURRENT_DIR))
  1047. # 从 server/.env 文件加载环境变量
  1048. def load_env_file():
  1049. """从 server/.env 文件加载环境变量"""
  1050. env_path = CURRENT_DIR.parent / '.env'
  1051. if env_path.exists():
  1052. print(f"[Config] Loading env from: {env_path}")
  1053. with open(env_path, 'r', encoding='utf-8') as f:
  1054. for line in f:
  1055. line = line.strip()
  1056. if line and not line.startswith('#') and '=' in line:
  1057. key, value = line.split('=', 1)
  1058. key = key.strip()
  1059. value = value.strip()
  1060. # 移除引号
  1061. if value.startswith('"') and value.endswith('"'):
  1062. value = value[1:-1]
  1063. elif value.startswith("'") and value.endswith("'"):
  1064. value = value[1:-1]
  1065. # 只在环境变量未设置时加载
  1066. if key not in os.environ:
  1067. os.environ[key] = value
  1068. safe_key = key.upper()
  1069. is_sensitive = any(p in safe_key for p in ['PASSWORD', 'SECRET', 'TOKEN', 'KEY', 'ENCRYPT'])
  1070. print(f"[Config] Loaded: {key}=***" if is_sensitive else f"[Config] Loaded: {key}={value}")
  1071. else:
  1072. print(f"[Config] .env file not found: {env_path}")
  1073. # 加载环境变量
  1074. load_env_file()
  1075. from flask import Flask, request, jsonify
  1076. from flask_cors import CORS
  1077. from platforms import get_publisher, PLATFORM_MAP
  1078. from platforms.base import PublishParams
  1079. from platforms.weixin import WeixinPublisher
  1080. def parse_datetime(date_str: str):
  1081. """解析日期时间字符串"""
  1082. if not date_str:
  1083. return None
  1084. formats = [
  1085. "%Y-%m-%d %H:%M:%S",
  1086. "%Y-%m-%d %H:%M",
  1087. "%Y/%m/%d %H:%M:%S",
  1088. "%Y/%m/%d %H:%M",
  1089. "%Y-%m-%dT%H:%M:%S",
  1090. "%Y-%m-%dT%H:%M:%SZ",
  1091. ]
  1092. for fmt in formats:
  1093. try:
  1094. return datetime.strptime(date_str, fmt)
  1095. except ValueError:
  1096. continue
  1097. return None
  1098. def _extract_ip_ports(text: str):
  1099. if not text:
  1100. return []
  1101. matches = re.findall(r'\b(?:\d{1,3}\.){3}\d{1,3}:\d{2,5}\b', text)
  1102. seen = set()
  1103. results = []
  1104. for m in matches:
  1105. if m in seen:
  1106. continue
  1107. seen.add(m)
  1108. results.append(m)
  1109. return results
  1110. def _mask_ip_port(ip_port: str) -> str:
  1111. try:
  1112. host, port = ip_port.split(':', 1)
  1113. parts = host.split('.')
  1114. if len(parts) == 4:
  1115. return f"{parts[0]}.{parts[1]}.{parts[2]}.***:{port}"
  1116. except Exception:
  1117. pass
  1118. return '***'
  1119. def _build_requests_proxy_meta(host: str, port: int, username: str = '', password: str = '') -> str:
  1120. host = str(host).strip()
  1121. port = int(port)
  1122. if username and password:
  1123. return f"http://{username}:{password}@{host}:{port}"
  1124. return f"http://{host}:{port}"
  1125. def _test_proxy_connectivity(test_url: str, host: str, port: int, username: str = '', password: str = '', timeout: int = 10) -> bool:
  1126. proxy_meta = _build_requests_proxy_meta(host, port, username, password)
  1127. proxies = {"http": proxy_meta, "https": proxy_meta}
  1128. start = int(round(time.time() * 1000))
  1129. try:
  1130. session = requests.Session()
  1131. session.trust_env = False
  1132. resp = session.get(test_url, proxies=proxies, timeout=timeout)
  1133. _ = resp.text
  1134. cost = int(round(time.time() * 1000)) - start
  1135. print(f"[Proxy] test ok: {_mask_ip_port(host + ':' + str(port))} cost={cost}ms", flush=True)
  1136. return True
  1137. except Exception as e:
  1138. print(f"[Proxy] test failed: {_mask_ip_port(host + ':' + str(port))} err={type(e).__name__}", flush=True)
  1139. return False
  1140. _PROXY_CACHE_TTL_SECONDS = 20 * 60
  1141. _resolved_proxy_cache = {}
  1142. def _resolve_shenlong_proxy(proxy_payload: dict) -> dict:
  1143. test_url = 'http://myip.ipip.net'
  1144. city = str(proxy_payload.get('city') or '').strip()
  1145. region_code = str(proxy_payload.get('regionCode') or '').strip()
  1146. api_url = str(proxy_payload.get('apiUrl') or '').strip()
  1147. product_key = str(proxy_payload.get('productKey') or '').strip()
  1148. signature = str(proxy_payload.get('signature') or '').strip()
  1149. isp = str(proxy_payload.get('isp') or '').strip()
  1150. publish_task_id = str(proxy_payload.get('publish_task_id') or '').strip()
  1151. if not product_key:
  1152. raise Exception('缺少神龙产品Key')
  1153. if not signature:
  1154. raise Exception('缺少神龙签名')
  1155. if region_code and region_code.isdigit() and len(region_code) == 6:
  1156. if region_code.endswith('0000'):
  1157. region_code = ''
  1158. elif not region_code.endswith('00'):
  1159. region_code = region_code[:4] + '00'
  1160. cache_key = ''
  1161. if publish_task_id:
  1162. cache_key = f"publish_task:{publish_task_id}:area:{region_code or '-'}:isp:{isp or '-'}"
  1163. now = int(time.time())
  1164. cached = _resolved_proxy_cache.get(cache_key)
  1165. if isinstance(cached, dict) and cached.get('expire_at', 0) > now and cached.get('server'):
  1166. server = str(cached.get('server') or '').strip()
  1167. if server:
  1168. print(f"[Proxy] cache hit: task={publish_task_id} area={region_code or '-'} isp={isp or '-'}", flush=True)
  1169. return {'server': server}
  1170. request_url = api_url or 'http://api.shenlongip.com/ip'
  1171. params = {
  1172. 'key': product_key,
  1173. 'sign': signature,
  1174. 'count': 1,
  1175. 'pattern': 'json',
  1176. 'mr': 1,
  1177. }
  1178. if region_code:
  1179. params['area'] = region_code
  1180. if isp:
  1181. params['isp'] = isp
  1182. payload = None
  1183. session = requests.Session()
  1184. session.trust_env = False
  1185. resp = session.get(
  1186. request_url,
  1187. params=params,
  1188. headers={
  1189. 'User-Agent': 'Mozilla/5.0',
  1190. 'Accept': 'application/json',
  1191. },
  1192. timeout=15,
  1193. )
  1194. content_type = (resp.headers.get('content-type') or '').lower()
  1195. raw_text = resp.text or ''
  1196. try:
  1197. if 'application/json' in content_type or raw_text.strip().startswith('{') or raw_text.strip().startswith('['):
  1198. payload = resp.json()
  1199. except Exception:
  1200. payload = None
  1201. if isinstance(payload, dict) and payload.get('code') is not None:
  1202. try:
  1203. api_code = int(payload.get('code'))
  1204. except Exception:
  1205. api_code = -1
  1206. if api_code != 200:
  1207. raise Exception(f"代理提取失败: code={api_code} msg={str(payload.get('msg') or '').strip() or 'unknown'}")
  1208. elif resp.status_code >= 400:
  1209. raise Exception(f"代理提取失败: HTTP {resp.status_code}")
  1210. def collect_ip_ports(data_list, city_filter: str):
  1211. ips = []
  1212. for item in data_list:
  1213. if isinstance(item, str):
  1214. for ip_port in _extract_ip_ports(item):
  1215. ips.append(ip_port)
  1216. continue
  1217. if not isinstance(item, dict):
  1218. continue
  1219. item_city = str(item.get('city') or item.get('area') or '').strip()
  1220. if city_filter and item_city and item_city != city_filter:
  1221. continue
  1222. ip = str(item.get('ip') or item.get('host') or item.get('proxy_ip') or '').strip()
  1223. port = str(item.get('port') or item.get('proxy_port') or '').strip()
  1224. if ip and port:
  1225. ips.append(f"{ip}:{port}")
  1226. proxy = str(item.get('proxy') or item.get('ip_port') or '').strip()
  1227. if proxy:
  1228. for ip_port in _extract_ip_ports(proxy):
  1229. ips.append(ip_port)
  1230. return ips
  1231. ip_ports = []
  1232. if payload is not None:
  1233. if isinstance(payload, dict):
  1234. if isinstance(payload.get('data'), list):
  1235. ip_ports = collect_ip_ports(payload.get('data'), '')
  1236. elif isinstance(payload.get('list'), list):
  1237. ip_ports = collect_ip_ports(payload.get('list'), '')
  1238. elif payload.get('ip') and payload.get('port'):
  1239. ip_ports = collect_ip_ports([payload], '')
  1240. elif isinstance(payload, list):
  1241. ip_ports = collect_ip_ports(payload, '')
  1242. else:
  1243. ip_ports = _extract_ip_ports(raw_text)
  1244. if not ip_ports:
  1245. raise Exception('代理提取结果为空')
  1246. random.shuffle(ip_ports)
  1247. candidates = ip_ports[: min(10, len(ip_ports))]
  1248. print(f"[Proxy] shenlong resolved: city={city or '-'} area={region_code or '-'} candidates={len(candidates)}/{len(ip_ports)}", flush=True)
  1249. for ip_port in candidates:
  1250. try:
  1251. host, port_str = ip_port.split(':', 1)
  1252. port = int(port_str)
  1253. except Exception:
  1254. continue
  1255. if _test_proxy_connectivity(test_url, host, port, timeout=10):
  1256. server = f"http://{host}:{port}"
  1257. if cache_key:
  1258. _resolved_proxy_cache[cache_key] = {
  1259. 'server': server,
  1260. 'expire_at': int(time.time()) + _PROXY_CACHE_TTL_SECONDS,
  1261. }
  1262. print(f"[Proxy] cache set: task={publish_task_id} ttl={_PROXY_CACHE_TTL_SECONDS}s", flush=True)
  1263. return {'server': server}
  1264. raise Exception('未找到可用代理IP')
  1265. def validate_video_file(video_path: str) -> bool:
  1266. """验证视频文件是否有效"""
  1267. if not video_path:
  1268. return False
  1269. if not os.path.exists(video_path):
  1270. return False
  1271. if not os.path.isfile(video_path):
  1272. return False
  1273. valid_extensions = ['.mp4', '.mov', '.avi', '.mkv', '.flv', '.wmv', '.webm']
  1274. ext = os.path.splitext(video_path)[1].lower()
  1275. if ext not in valid_extensions:
  1276. return False
  1277. if os.path.getsize(video_path) < 1024:
  1278. return False
  1279. return True
  1280. # 创建 Flask 应用
  1281. app = Flask(__name__)
  1282. CORS(app)
  1283. # 配置日志以显示所有 HTTP 请求
  1284. import logging
  1285. logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
  1286. # 让 werkzeug 日志显示
  1287. werkzeug_logger = logging.getLogger('werkzeug')
  1288. werkzeug_logger.setLevel(logging.INFO)
  1289. # 添加 StreamHandler 确保输出到控制台
  1290. handler = logging.StreamHandler(sys.stdout)
  1291. handler.setLevel(logging.INFO)
  1292. werkzeug_logger.addHandler(handler)
  1293. logging.getLogger('urllib3').setLevel(logging.WARNING)
  1294. # 添加请求钩子,打印所有收到的请求
  1295. @app.before_request
  1296. def log_request_info():
  1297. """在处理每个请求前打印详细信息"""
  1298. print(f"\n{'='*60}", flush=True)
  1299. print(f"[HTTP Request] {request.method} {request.path}", flush=True)
  1300. print(f"[HTTP Request] From: {request.remote_addr}", flush=True)
  1301. if request.content_type and 'json' in request.content_type:
  1302. try:
  1303. data = request.get_json(silent=True)
  1304. if data:
  1305. # 打印部分参数,避免太长
  1306. keys = list(data.keys()) if data else []
  1307. print(f"[HTTP Request] JSON keys: {keys}", flush=True)
  1308. except:
  1309. pass
  1310. print(f"{'='*60}\n", flush=True)
  1311. # 全局配置
  1312. HEADLESS_MODE = os.environ.get('HEADLESS', 'true').lower() == 'true'
  1313. print(f"[Config] HEADLESS env value: '{os.environ.get('HEADLESS', 'NOT SET')}'", flush=True)
  1314. print(f"[Config] HEADLESS_MODE: {HEADLESS_MODE}", flush=True)
  1315. # Node.js API 配置
  1316. NODEJS_API_BASE_URL = os.environ.get('NODEJS_API_URL', 'http://localhost:3000')
  1317. INTERNAL_API_KEY = os.environ.get('INTERNAL_API_KEY', 'internal-api-key-default')
  1318. print(f"[API Config] Node.js API: {NODEJS_API_BASE_URL}", flush=True)
  1319. class NodeApiError(Exception):
  1320. """用于把 Node 内部接口的错误状态码/内容透传给调用方。"""
  1321. def __init__(self, status_code: int, payload: dict):
  1322. super().__init__(payload.get("error") or payload.get("message") or "Node API error")
  1323. self.status_code = status_code
  1324. self.payload = payload
  1325. def call_nodejs_api(method: str, endpoint: str, data: dict = None, params: dict = None) -> dict:
  1326. """调用 Node.js 内部 API"""
  1327. url = f"{NODEJS_API_BASE_URL}/api/internal{endpoint}"
  1328. headers = {
  1329. 'Content-Type': 'application/json',
  1330. 'X-Internal-API-Key': INTERNAL_API_KEY,
  1331. }
  1332. try:
  1333. if method.upper() == 'GET':
  1334. response = requests.get(url, headers=headers, params=params, timeout=30)
  1335. elif method.upper() == 'POST':
  1336. response = requests.post(url, headers=headers, json=data, timeout=30)
  1337. else:
  1338. raise ValueError(f"Unsupported HTTP method: {method}")
  1339. # 兼容 Node 可能返回非 JSON 的情况
  1340. try:
  1341. payload = response.json()
  1342. except Exception:
  1343. payload = {
  1344. "success": False,
  1345. "error": "Node.js API 返回非 JSON 响应",
  1346. "status": response.status_code,
  1347. "text": (response.text or "")[:2000],
  1348. "url": url,
  1349. "endpoint": endpoint,
  1350. }
  1351. if response.status_code >= 400:
  1352. # 把真实状态码/返回体抛出去,由路由决定如何返回给前端
  1353. if isinstance(payload, dict):
  1354. payload.setdefault("success", False)
  1355. payload.setdefault("status", response.status_code)
  1356. payload.setdefault("url", url)
  1357. payload.setdefault("endpoint", endpoint)
  1358. raise NodeApiError(response.status_code, payload if isinstance(payload, dict) else {
  1359. "success": False,
  1360. "error": "Node.js API 调用失败",
  1361. "status": response.status_code,
  1362. "data": payload,
  1363. "url": url,
  1364. "endpoint": endpoint,
  1365. })
  1366. return payload
  1367. except requests.exceptions.RequestException as e:
  1368. # 连接失败/超时等(此时通常拿不到 response)
  1369. print(f"[API Error] 调用 Node.js API 失败: {e}", flush=True)
  1370. raise NodeApiError(502, {
  1371. "success": False,
  1372. "error": f"无法连接 Node.js API: {str(e)}",
  1373. "status": 502,
  1374. "url": url,
  1375. "endpoint": endpoint,
  1376. })
  1377. # ==================== 签名相关(小红书专用) ====================
  1378. @app.route("/sign", methods=["POST"])
  1379. def sign_endpoint():
  1380. """小红书签名接口"""
  1381. try:
  1382. from platforms.xiaohongshu import XiaohongshuPublisher
  1383. data = request.json
  1384. publisher = XiaohongshuPublisher(headless=True)
  1385. result = asyncio.run(publisher.get_sign(
  1386. data.get("uri", ""),
  1387. data.get("data"),
  1388. data.get("a1", ""),
  1389. data.get("web_session", "")
  1390. ))
  1391. return jsonify(result)
  1392. except Exception as e:
  1393. traceback.print_exc()
  1394. return jsonify({"error": str(e)}), 500
  1395. # ==================== 统一发布接口 ====================
  1396. @app.route("/publish", methods=["POST"])
  1397. def publish_video():
  1398. """
  1399. 统一发布接口
  1400. 请求体:
  1401. {
  1402. "platform": "douyin", # douyin | xiaohongshu | weixin | kuaishou
  1403. "cookie": "cookie字符串或JSON",
  1404. "title": "视频标题",
  1405. "description": "视频描述(可选)",
  1406. "video_path": "视频文件绝对路径",
  1407. "cover_path": "封面图片绝对路径(可选)",
  1408. "tags": ["话题1", "话题2"],
  1409. "post_time": "定时发布时间(可选,格式:2024-01-20 12:00:00)",
  1410. "location": "位置(可选,默认:重庆市)"
  1411. }
  1412. 响应:
  1413. {
  1414. "success": true,
  1415. "platform": "douyin",
  1416. "video_id": "xxx",
  1417. "video_url": "xxx",
  1418. "message": "发布成功"
  1419. }
  1420. """
  1421. try:
  1422. data = request.json
  1423. # 获取参数
  1424. platform = data.get("platform", "").lower()
  1425. cookie_str = data.get("cookie", "")
  1426. title = data.get("title", "")
  1427. description = data.get("description", "")
  1428. video_path = data.get("video_path", "")
  1429. cover_path = data.get("cover_path")
  1430. tags = data.get("tags", [])
  1431. post_time = data.get("post_time")
  1432. location = data.get("location", "重庆市")
  1433. # 调试日志
  1434. print(f"[Publish] 收到请求: platform={platform}, title={title}, video_path={video_path}")
  1435. # 参数验证
  1436. if not platform:
  1437. print("[Publish] 错误: 缺少 platform 参数")
  1438. return jsonify({"success": False, "error": "缺少 platform 参数"}), 400
  1439. if platform not in PLATFORM_MAP:
  1440. print(f"[Publish] 错误: 不支持的平台 {platform}")
  1441. return jsonify({
  1442. "success": False,
  1443. "error": f"不支持的平台: {platform},支持: {list(PLATFORM_MAP.keys())}"
  1444. }), 400
  1445. if not cookie_str:
  1446. print("[Publish] 错误: 缺少 cookie 参数")
  1447. return jsonify({"success": False, "error": "缺少 cookie 参数"}), 400
  1448. if not title:
  1449. print("[Publish] 错误: 缺少 title 参数")
  1450. return jsonify({"success": False, "error": "缺少 title 参数"}), 400
  1451. if not video_path:
  1452. print("[Publish] 错误: 缺少 video_path 参数")
  1453. return jsonify({"success": False, "error": "缺少 video_path 参数"}), 400
  1454. # 视频文件验证(增加详细信息)
  1455. if not os.path.exists(video_path):
  1456. print(f"[Publish] 错误: 视频文件不存在: {video_path}")
  1457. return jsonify({"success": False, "error": f"视频文件不存在: {video_path}"}), 400
  1458. if not os.path.isfile(video_path):
  1459. print(f"[Publish] 错误: 路径不是文件: {video_path}")
  1460. return jsonify({"success": False, "error": f"路径不是文件: {video_path}"}), 400
  1461. # 解析发布时间
  1462. publish_date = parse_datetime(post_time) if post_time else None
  1463. # 创建发布参数
  1464. params = PublishParams(
  1465. title=title,
  1466. video_path=video_path,
  1467. description=description,
  1468. cover_path=cover_path,
  1469. tags=tags,
  1470. publish_date=publish_date,
  1471. location=location
  1472. )
  1473. print("=" * 60)
  1474. print(f"[Publish] 平台: {platform}")
  1475. print(f"[Publish] 标题: {title}")
  1476. print(f"[Publish] 视频: {video_path}")
  1477. print(f"[Publish] 封面: {cover_path}")
  1478. print(f"[Publish] 话题: {tags}")
  1479. print(f"[Publish] 定时: {publish_date}")
  1480. print("=" * 60)
  1481. # 获取对应平台的发布器
  1482. PublisherClass = get_publisher(platform)
  1483. publisher = PublisherClass(headless=HEADLESS_MODE)
  1484. proxy_payload = data.get('proxy')
  1485. if isinstance(proxy_payload, dict) and proxy_payload.get('enabled'):
  1486. provider = str(proxy_payload.get('provider') or 'shenlong').strip().lower()
  1487. if provider == 'shenlong':
  1488. proxy_payload_with_task = dict(proxy_payload)
  1489. if data.get('publish_task_id') is not None:
  1490. proxy_payload_with_task['publish_task_id'] = data.get('publish_task_id')
  1491. publisher.proxy_config = _resolve_shenlong_proxy(proxy_payload_with_task)
  1492. # 执行发布
  1493. result = asyncio.run(publisher.run(cookie_str, params))
  1494. response_data = {
  1495. "success": result.success,
  1496. "platform": result.platform,
  1497. "video_id": result.video_id,
  1498. "video_url": result.video_url,
  1499. "message": result.message,
  1500. "error": result.error,
  1501. "need_captcha": result.need_captcha,
  1502. "captcha_type": result.captcha_type,
  1503. "screenshot_base64": result.screenshot_base64,
  1504. "page_url": result.page_url,
  1505. "status": result.status
  1506. }
  1507. # 如果需要验证码,打印明确的日志
  1508. if result.need_captcha:
  1509. print(f"[Publish] 需要验证码: type={result.captcha_type}")
  1510. return jsonify(response_data)
  1511. except Exception as e:
  1512. traceback.print_exc()
  1513. return jsonify({"success": False, "error": str(e)}), 500
  1514. # ==================== AI 辅助发布接口 ====================
  1515. # 存储活跃的发布会话
  1516. active_publish_sessions = {}
  1517. @app.route("/publish/ai-assisted", methods=["POST"])
  1518. def publish_ai_assisted():
  1519. """
  1520. AI 辅助发布接口
  1521. 与普通发布接口的区别:
  1522. 1. 发布过程中会返回截图供 AI 分析
  1523. 2. 如果检测到需要验证码,返回截图和状态,等待外部处理
  1524. 3. 支持继续发布(输入验证码后)
  1525. 请求体:
  1526. {
  1527. "platform": "douyin",
  1528. "cookie": "cookie字符串",
  1529. "title": "视频标题",
  1530. "video_path": "视频文件路径",
  1531. ...
  1532. "return_screenshot": true // 是否返回截图
  1533. }
  1534. 响应:
  1535. {
  1536. "success": true/false,
  1537. "status": "success|failed|need_captcha|processing",
  1538. "screenshot_base64": "...", // 当前页面截图
  1539. "page_url": "...",
  1540. ...
  1541. }
  1542. """
  1543. # 立即打印请求日志,确保能看到
  1544. print("\n" + "!" * 60, flush=True)
  1545. print("!!! [AI-Assisted Publish] 收到请求 !!!", flush=True)
  1546. print("!" * 60 + "\n", flush=True)
  1547. try:
  1548. data = request.json
  1549. print(f"[AI-Assisted Publish] 请求数据: platform={data.get('platform')}, title={data.get('title')}", flush=True)
  1550. # 获取参数
  1551. platform = data.get("platform", "").lower()
  1552. cookie_str = data.get("cookie", "")
  1553. title = data.get("title", "")
  1554. description = data.get("description", "")
  1555. video_path = data.get("video_path", "")
  1556. cover_path = data.get("cover_path")
  1557. tags = data.get("tags", [])
  1558. post_time = data.get("post_time")
  1559. location = data.get("location", "重庆市")
  1560. return_screenshot = data.get("return_screenshot", True)
  1561. # 支持请求级别的 headless 参数,用于验证码场景下的有头浏览器模式
  1562. headless = data.get("headless", HEADLESS_MODE)
  1563. if isinstance(headless, str):
  1564. headless = headless.lower() == 'true'
  1565. # 参数验证
  1566. if not platform:
  1567. return jsonify({"success": False, "error": "缺少 platform 参数"}), 400
  1568. if platform not in PLATFORM_MAP:
  1569. return jsonify({"success": False, "error": f"不支持的平台: {platform}"}), 400
  1570. if not cookie_str:
  1571. return jsonify({"success": False, "error": "缺少 cookie 参数"}), 400
  1572. if not title:
  1573. return jsonify({"success": False, "error": "缺少 title 参数"}), 400
  1574. if not video_path or not os.path.exists(video_path):
  1575. return jsonify({"success": False, "error": f"视频文件不存在: {video_path}"}), 400
  1576. # 解析发布时间
  1577. publish_date = parse_datetime(post_time) if post_time else None
  1578. # 创建发布参数
  1579. params = PublishParams(
  1580. title=title,
  1581. video_path=video_path,
  1582. description=description,
  1583. cover_path=cover_path,
  1584. tags=tags,
  1585. publish_date=publish_date,
  1586. location=location
  1587. )
  1588. print("=" * 60)
  1589. print(f"[AI Publish] 平台: {platform}")
  1590. print(f"[AI Publish] 标题: {title}")
  1591. print(f"[AI Publish] 视频: {video_path}")
  1592. print(f"[AI Publish] Headless: {headless}")
  1593. print("=" * 60)
  1594. # 获取对应平台的发布器
  1595. PublisherClass = get_publisher(platform)
  1596. publisher = PublisherClass(headless=headless) # 使用请求参数中的 headless 值
  1597. proxy_payload = data.get('proxy')
  1598. if isinstance(proxy_payload, dict) and proxy_payload.get('enabled'):
  1599. provider = str(proxy_payload.get('provider') or 'shenlong').strip().lower()
  1600. if provider == 'shenlong':
  1601. proxy_payload_with_task = dict(proxy_payload)
  1602. if data.get('publish_task_id') is not None:
  1603. proxy_payload_with_task['publish_task_id'] = data.get('publish_task_id')
  1604. publisher.proxy_config = _resolve_shenlong_proxy(proxy_payload_with_task)
  1605. try:
  1606. publisher.user_id = int(data.get("user_id")) if data.get("user_id") is not None else None
  1607. except Exception:
  1608. publisher.user_id = None
  1609. try:
  1610. publisher.publish_task_id = int(data.get("publish_task_id")) if data.get("publish_task_id") is not None else None
  1611. except Exception:
  1612. publisher.publish_task_id = None
  1613. try:
  1614. publisher.publish_account_id = int(data.get("publish_account_id")) if data.get("publish_account_id") is not None else None
  1615. except Exception:
  1616. publisher.publish_account_id = None
  1617. # 执行发布
  1618. result = asyncio.run(publisher.run(cookie_str, params))
  1619. response_data = {
  1620. "success": result.success,
  1621. "platform": result.platform,
  1622. "video_id": result.video_id,
  1623. "video_url": result.video_url,
  1624. "message": result.message,
  1625. "error": result.error,
  1626. "need_captcha": result.need_captcha,
  1627. "captcha_type": result.captcha_type,
  1628. "status": result.status or ("success" if result.success else "failed"),
  1629. "page_url": result.page_url
  1630. }
  1631. # 如果请求返回截图
  1632. if return_screenshot and result.screenshot_base64:
  1633. response_data["screenshot_base64"] = result.screenshot_base64
  1634. return jsonify(response_data)
  1635. except Exception as e:
  1636. traceback.print_exc()
  1637. return jsonify({"success": False, "error": str(e), "status": "error"}), 500
  1638. # ==================== 批量发布接口 ====================
  1639. @app.route("/publish/batch", methods=["POST"])
  1640. def publish_batch():
  1641. """
  1642. 批量发布接口 - 发布到多个平台
  1643. 请求体:
  1644. {
  1645. "platforms": ["douyin", "xiaohongshu"],
  1646. "cookies": {
  1647. "douyin": "cookie字符串",
  1648. "xiaohongshu": "cookie字符串"
  1649. },
  1650. "title": "视频标题",
  1651. "video_path": "视频文件绝对路径",
  1652. ...
  1653. }
  1654. """
  1655. try:
  1656. data = request.json
  1657. platforms = data.get("platforms", [])
  1658. cookies = data.get("cookies", {})
  1659. if not platforms:
  1660. return jsonify({"success": False, "error": "缺少 platforms 参数"}), 400
  1661. results = []
  1662. for platform in platforms:
  1663. platform = platform.lower()
  1664. cookie_str = cookies.get(platform, "")
  1665. if not cookie_str:
  1666. results.append({
  1667. "platform": platform,
  1668. "success": False,
  1669. "error": f"缺少 {platform} 的 cookie"
  1670. })
  1671. continue
  1672. try:
  1673. # 创建参数
  1674. params = PublishParams(
  1675. title=data.get("title", ""),
  1676. video_path=data.get("video_path", ""),
  1677. description=data.get("description", ""),
  1678. cover_path=data.get("cover_path"),
  1679. tags=data.get("tags", []),
  1680. publish_date=parse_datetime(data.get("post_time")),
  1681. location=data.get("location", "重庆市")
  1682. )
  1683. # 发布
  1684. PublisherClass = get_publisher(platform)
  1685. publisher = PublisherClass(headless=HEADLESS_MODE)
  1686. result = asyncio.run(publisher.run(cookie_str, params))
  1687. results.append({
  1688. "platform": result.platform,
  1689. "success": result.success,
  1690. "video_id": result.video_id,
  1691. "message": result.message,
  1692. "error": result.error
  1693. })
  1694. except Exception as e:
  1695. results.append({
  1696. "platform": platform,
  1697. "success": False,
  1698. "error": str(e)
  1699. })
  1700. # 统计成功/失败数量
  1701. success_count = sum(1 for r in results if r.get("success"))
  1702. return jsonify({
  1703. "success": success_count > 0,
  1704. "total": len(platforms),
  1705. "success_count": success_count,
  1706. "fail_count": len(platforms) - success_count,
  1707. "results": results
  1708. })
  1709. except Exception as e:
  1710. traceback.print_exc()
  1711. return jsonify({"success": False, "error": str(e)}), 500
  1712. # ==================== Cookie 验证接口 ====================
  1713. @app.route("/check_cookie", methods=["POST"])
  1714. def check_cookie():
  1715. """检查 cookie 是否有效"""
  1716. try:
  1717. data = request.json
  1718. platform = data.get("platform", "").lower()
  1719. cookie_str = data.get("cookie", "")
  1720. if not cookie_str:
  1721. return jsonify({"valid": False, "error": "缺少 cookie 参数"}), 400
  1722. # 目前只支持小红书的 cookie 验证
  1723. if platform == "xiaohongshu":
  1724. try:
  1725. from platforms.xiaohongshu import XiaohongshuPublisher, XHS_SDK_AVAILABLE
  1726. if XHS_SDK_AVAILABLE:
  1727. from xhs import XhsClient
  1728. publisher = XiaohongshuPublisher()
  1729. xhs_client = XhsClient(cookie_str, sign=publisher.sign_sync)
  1730. info = xhs_client.get_self_info()
  1731. if info:
  1732. return jsonify({
  1733. "valid": True,
  1734. "user_info": {
  1735. "user_id": info.get("user_id"),
  1736. "nickname": info.get("nickname"),
  1737. "avatar": info.get("images")
  1738. }
  1739. })
  1740. except Exception as e:
  1741. return jsonify({"valid": False, "error": str(e)})
  1742. # 其他平台返回格式正确但未验证
  1743. return jsonify({
  1744. "valid": True,
  1745. "message": "Cookie 格式正确,但未进行在线验证"
  1746. })
  1747. except Exception as e:
  1748. traceback.print_exc()
  1749. return jsonify({"valid": False, "error": str(e)})
  1750. # ==================== 获取作品列表接口 ====================
  1751. @app.route("/works", methods=["POST"])
  1752. def get_works():
  1753. """
  1754. 获取作品列表
  1755. 请求体:
  1756. {
  1757. "platform": "douyin", # douyin | xiaohongshu | kuaishou
  1758. "cookie": "cookie字符串或JSON",
  1759. "page": 0, # 页码(从0开始,可选,默认0)
  1760. "page_size": 20 # 每页数量(可选,默认20)
  1761. }
  1762. 响应:
  1763. {
  1764. "success": true,
  1765. "platform": "douyin",
  1766. "works": [...],
  1767. "total": 100,
  1768. "has_more": true
  1769. }
  1770. """
  1771. try:
  1772. data = request.json
  1773. platform = data.get("platform", "").lower()
  1774. cookie_str = data.get("cookie", "")
  1775. page = data.get("page", 0)
  1776. page_size = data.get("page_size", 20)
  1777. auto_paging = bool(data.get("auto_paging", False))
  1778. print(f"[Works] 收到请求: platform={platform}, page={page}, page_size={page_size}, auto_paging={auto_paging}", flush=True)
  1779. if not platform:
  1780. return jsonify({"success": False, "error": "缺少 platform 参数"}), 400
  1781. if platform not in PLATFORM_MAP:
  1782. return jsonify({
  1783. "success": False,
  1784. "error": f"不支持的平台: {platform},支持: {list(PLATFORM_MAP.keys())}"
  1785. }), 400
  1786. if not cookie_str:
  1787. return jsonify({"success": False, "error": "缺少 cookie 参数"}), 400
  1788. # 获取对应平台的发布器
  1789. PublisherClass = get_publisher(platform)
  1790. publisher = PublisherClass(headless=HEADLESS_MODE)
  1791. # 执行获取作品
  1792. if platform == "xiaohongshu" and auto_paging and hasattr(publisher, "get_all_works"):
  1793. result = asyncio.run(publisher.get_all_works(cookie_str))
  1794. else:
  1795. result = asyncio.run(publisher.run_get_works(cookie_str, page, page_size))
  1796. return jsonify(result.to_dict())
  1797. except Exception as e:
  1798. traceback.print_exc()
  1799. return jsonify({"success": False, "error": str(e)}), 500
  1800. # ==================== 保存作品日统计数据接口 ====================
  1801. @app.route("/work_day_statistics", methods=["POST"])
  1802. def save_work_day_statistics():
  1803. """
  1804. 保存作品每日统计数据
  1805. 当天的数据走更新流,日期变化走新增流
  1806. 请求体:
  1807. {
  1808. "statistics": [
  1809. {
  1810. "work_id": 1,
  1811. "fans_count": 1000,
  1812. "play_count": 5000,
  1813. "like_count": 200,
  1814. "comment_count": 50,
  1815. "share_count": 30,
  1816. "collect_count": 100
  1817. },
  1818. ...
  1819. ]
  1820. }
  1821. 响应:
  1822. {
  1823. "success": true,
  1824. "inserted": 5,
  1825. "updated": 3,
  1826. "message": "保存成功"
  1827. }
  1828. """
  1829. print("=" * 60, flush=True)
  1830. print("[DEBUG] ===== 进入 save_work_day_statistics 方法 =====", flush=True)
  1831. print(f"[DEBUG] 请求方法: {request.method}", flush=True)
  1832. print(f"[DEBUG] 请求数据: {request.json}", flush=True)
  1833. print("=" * 60, flush=True)
  1834. try:
  1835. data = request.json
  1836. statistics_list = data.get("statistics", [])
  1837. if not statistics_list:
  1838. return jsonify({"success": False, "error": "缺少 statistics 参数"}), 400
  1839. print(f"[WorkDayStatistics] 收到请求: {len(statistics_list)} 条统计数据")
  1840. # 调用 Node.js API 保存数据
  1841. result = call_nodejs_api('POST', '/work-day-statistics', {
  1842. 'statistics': statistics_list
  1843. })
  1844. print(f"[WorkDayStatistics] 完成: 新增 {result.get('inserted', 0)} 条, 更新 {result.get('updated', 0)} 条")
  1845. return jsonify(result)
  1846. except Exception as e:
  1847. traceback.print_exc()
  1848. return jsonify({"success": False, "error": str(e)}), 500
  1849. @app.route("/work_day_statistics/trend", methods=["GET"])
  1850. def get_statistics_trend():
  1851. """
  1852. 获取数据趋势(用于 Dashboard 数据看板 和 数据分析页面)
  1853. 查询参数:
  1854. user_id: 用户ID (必填)
  1855. days: 天数 (可选,默认7天,最大30天) - 与 start_date/end_date 二选一
  1856. start_date: 开始日期 (可选,格式 YYYY-MM-DD)
  1857. end_date: 结束日期 (可选,格式 YYYY-MM-DD)
  1858. account_id: 账号ID (可选,不填则查询所有账号)
  1859. 响应:
  1860. {
  1861. "success": true,
  1862. "data": {
  1863. "dates": ["01-16", "01-17", "01-18", ...],
  1864. "fans": [100, 120, 130, ...],
  1865. "views": [1000, 1200, 1500, ...],
  1866. "likes": [50, 60, 70, ...],
  1867. "comments": [10, 12, 15, ...],
  1868. "shares": [5, 6, 8, ...],
  1869. "collects": [20, 25, 30, ...]
  1870. }
  1871. }
  1872. """
  1873. try:
  1874. user_id = request.args.get("user_id")
  1875. days = request.args.get("days")
  1876. start_date = request.args.get("start_date")
  1877. end_date = request.args.get("end_date")
  1878. account_id = request.args.get("account_id")
  1879. if not user_id:
  1880. return jsonify({"success": False, "error": "缺少 user_id 参数"}), 400
  1881. # 调用 Node.js API 获取数据
  1882. params = {"user_id": user_id}
  1883. if days:
  1884. params["days"] = days
  1885. if start_date:
  1886. params["start_date"] = start_date
  1887. if end_date:
  1888. params["end_date"] = end_date
  1889. if account_id:
  1890. params["account_id"] = account_id
  1891. result = call_nodejs_api('GET', '/work-day-statistics/trend', params=params)
  1892. return jsonify(result)
  1893. except Exception as e:
  1894. traceback.print_exc()
  1895. return jsonify({"success": False, "error": str(e)}), 500
  1896. @app.route("/work_day_statistics/platforms", methods=["GET"])
  1897. def get_statistics_by_platform():
  1898. """
  1899. 按平台分组获取统计数据(用于数据分析页面的平台对比)
  1900. 数据来源:
  1901. - 粉丝数:从 platform_accounts 表获取(账号级别数据)
  1902. - 播放量/点赞/评论/收藏:从 work_day_statistics 表按平台汇总
  1903. - 粉丝增量:通过比较区间内最早和最新的粉丝数计算
  1904. 查询参数:
  1905. user_id: 用户ID (必填)
  1906. days: 天数 (可选,默认30天,最大30天) - 与 start_date/end_date 二选一
  1907. start_date: 开始日期 (可选,格式 YYYY-MM-DD)
  1908. end_date: 结束日期 (可选,格式 YYYY-MM-DD)
  1909. 响应:
  1910. {
  1911. "success": true,
  1912. "data": [
  1913. {
  1914. "platform": "douyin",
  1915. "fansCount": 1000,
  1916. "fansIncrease": 50,
  1917. "viewsCount": 5000,
  1918. "likesCount": 200,
  1919. "commentsCount": 30,
  1920. "collectsCount": 100
  1921. },
  1922. ...
  1923. ]
  1924. }
  1925. """
  1926. try:
  1927. user_id = request.args.get("user_id")
  1928. days = request.args.get("days")
  1929. start_date = request.args.get("start_date")
  1930. end_date = request.args.get("end_date")
  1931. if not user_id:
  1932. return jsonify({"success": False, "error": "缺少 user_id 参数"}), 400
  1933. # 调用 Node.js API 获取数据
  1934. params = {"user_id": user_id}
  1935. if days:
  1936. params["days"] = days
  1937. if start_date:
  1938. params["start_date"] = start_date
  1939. if end_date:
  1940. params["end_date"] = end_date
  1941. result = call_nodejs_api('GET', '/work-day-statistics/platforms', params=params)
  1942. print(f"[PlatformStats] 返回 {len(result.get('data', []))} 个平台的数据")
  1943. return jsonify(result)
  1944. except Exception as e:
  1945. traceback.print_exc()
  1946. return jsonify({"success": False, "error": str(e)}), 500
  1947. @app.route("/work_day_statistics/batch", methods=["POST"])
  1948. def get_work_statistics_history():
  1949. """
  1950. 批量获取作品的历史统计数据
  1951. 请求体:
  1952. {
  1953. "work_ids": [1, 2, 3],
  1954. "start_date": "2025-01-01", # 可选
  1955. "end_date": "2025-01-21" # 可选
  1956. }
  1957. 响应:
  1958. {
  1959. "success": true,
  1960. "data": {
  1961. "1": [
  1962. {"record_date": "2025-01-20", "play_count": 100, ...},
  1963. {"record_date": "2025-01-21", "play_count": 150, ...}
  1964. ],
  1965. ...
  1966. }
  1967. }
  1968. """
  1969. try:
  1970. data = request.json
  1971. work_ids = data.get("work_ids", [])
  1972. start_date = data.get("start_date")
  1973. end_date = data.get("end_date")
  1974. if not work_ids:
  1975. return jsonify({"success": False, "error": "缺少 work_ids 参数"}), 400
  1976. # 调用 Node.js API 获取数据
  1977. request_data = {"work_ids": work_ids}
  1978. if start_date:
  1979. request_data["start_date"] = start_date
  1980. if end_date:
  1981. request_data["end_date"] = end_date
  1982. result = call_nodejs_api('POST', '/work-day-statistics/batch', data=request_data)
  1983. return jsonify(result)
  1984. except Exception as e:
  1985. traceback.print_exc()
  1986. return jsonify({"success": False, "error": str(e)}), 500
  1987. @app.route("/work_day_statistics/overview", methods=["GET"])
  1988. def get_overview():
  1989. """
  1990. 获取数据总览(账号列表和汇总统计)
  1991. 查询参数:
  1992. user_id: 用户ID (必填)
  1993. 响应:
  1994. {
  1995. "success": true,
  1996. "data": {
  1997. "accounts": [
  1998. {
  1999. "id": 1,
  2000. "nickname": "账号名称",
  2001. "username": "账号ID",
  2002. "avatarUrl": "头像URL",
  2003. "platform": "douyin",
  2004. "groupId": 1,
  2005. "fansCount": 1000,
  2006. "totalIncome": null,
  2007. "yesterdayIncome": null,
  2008. "totalViews": 5000,
  2009. "yesterdayViews": 100,
  2010. "yesterdayComments": 10,
  2011. "yesterdayLikes": 50,
  2012. "yesterdayFansIncrease": 5,
  2013. "updateTime": "2025-01-26T10:00:00Z",
  2014. "status": "active"
  2015. },
  2016. ...
  2017. ],
  2018. "summary": {
  2019. "totalAccounts": 5,
  2020. "totalIncome": 0,
  2021. "yesterdayIncome": 0,
  2022. "totalViews": 10000,
  2023. "yesterdayViews": 200,
  2024. "totalFans": 5000,
  2025. "yesterdayComments": 20,
  2026. "yesterdayLikes": 100,
  2027. "yesterdayFansIncrease": 10
  2028. }
  2029. }
  2030. }
  2031. """
  2032. try:
  2033. user_id = request.args.get("user_id")
  2034. if not user_id:
  2035. return jsonify({"success": False, "error": "缺少 user_id 参数"}), 400
  2036. # 调用 Node.js API 获取数据
  2037. params = {"user_id": user_id}
  2038. result = call_nodejs_api('GET', '/work-day-statistics/overview', params=params)
  2039. return jsonify(result)
  2040. except NodeApiError as e:
  2041. # 透传 Node 的真实状态码/错误内容,避免所有错误都变成 500
  2042. return jsonify(e.payload), e.status_code
  2043. except Exception as e:
  2044. traceback.print_exc()
  2045. return jsonify({"success": False, "error": str(e)}), 500
  2046. # ==================== 获取评论列表接口 ====================
  2047. @app.route("/comments", methods=["POST"])
  2048. def get_comments():
  2049. """
  2050. 获取作品评论
  2051. 请求体:
  2052. {
  2053. "platform": "douyin", # douyin | xiaohongshu | kuaishou
  2054. "cookie": "cookie字符串或JSON",
  2055. "work_id": "作品ID",
  2056. "cursor": "" # 分页游标(可选)
  2057. }
  2058. 响应:
  2059. {
  2060. "success": true,
  2061. "platform": "douyin",
  2062. "work_id": "xxx",
  2063. "comments": [...],
  2064. "total": 50,
  2065. "has_more": true,
  2066. "cursor": "xxx"
  2067. }
  2068. """
  2069. try:
  2070. data = request.json
  2071. platform = data.get("platform", "").lower()
  2072. cookie_str = data.get("cookie", "")
  2073. work_id = data.get("work_id", "")
  2074. cursor = data.get("cursor", "")
  2075. print(f"[Comments] 收到请求: platform={platform}, work_id={work_id}")
  2076. if not platform:
  2077. return jsonify({"success": False, "error": "缺少 platform 参数"}), 400
  2078. if platform not in PLATFORM_MAP:
  2079. return jsonify({
  2080. "success": False,
  2081. "error": f"不支持的平台: {platform},支持: {list(PLATFORM_MAP.keys())}"
  2082. }), 400
  2083. if not cookie_str:
  2084. return jsonify({"success": False, "error": "缺少 cookie 参数"}), 400
  2085. if not work_id:
  2086. return jsonify({"success": False, "error": "缺少 work_id 参数"}), 400
  2087. # 获取对应平台的发布器
  2088. PublisherClass = get_publisher(platform)
  2089. publisher = PublisherClass(headless=HEADLESS_MODE)
  2090. # 执行获取评论
  2091. result = asyncio.run(publisher.run_get_comments(cookie_str, work_id, cursor))
  2092. result_dict = result.to_dict()
  2093. # 添加 cursor 到响应
  2094. if hasattr(result, '__dict__') and 'cursor' in result.__dict__:
  2095. result_dict['cursor'] = result.__dict__['cursor']
  2096. return jsonify(result_dict)
  2097. except Exception as e:
  2098. traceback.print_exc()
  2099. return jsonify({"success": False, "error": str(e)}), 500
  2100. # ==================== 获取所有作品评论接口 ====================
  2101. @app.route("/all_comments", methods=["POST"])
  2102. def get_all_comments():
  2103. """
  2104. 获取所有作品的评论(一次性获取)
  2105. 请求体:
  2106. {
  2107. "platform": "douyin", # douyin | xiaohongshu
  2108. "cookie": "cookie字符串或JSON"
  2109. }
  2110. 响应:
  2111. {
  2112. "success": true,
  2113. "platform": "douyin",
  2114. "work_comments": [
  2115. {
  2116. "work_id": "xxx",
  2117. "title": "作品标题",
  2118. "cover_url": "封面URL",
  2119. "comments": [...]
  2120. }
  2121. ],
  2122. "total": 5
  2123. }
  2124. """
  2125. try:
  2126. data = request.json
  2127. platform = data.get("platform", "").lower()
  2128. cookie_str = data.get("cookie", "")
  2129. print(f"[AllComments] 收到请求: platform={platform}")
  2130. if not platform:
  2131. return jsonify({"success": False, "error": "缺少 platform 参数"}), 400
  2132. if platform not in ['douyin', 'xiaohongshu']:
  2133. return jsonify({
  2134. "success": False,
  2135. "error": f"该接口只支持 douyin 和 xiaohongshu 平台"
  2136. }), 400
  2137. if not cookie_str:
  2138. return jsonify({"success": False, "error": "缺少 cookie 参数"}), 400
  2139. # 获取对应平台的发布器
  2140. PublisherClass = get_publisher(platform)
  2141. publisher = PublisherClass(headless=HEADLESS_MODE)
  2142. # 执行获取所有评论
  2143. result = asyncio.run(publisher.get_all_comments(cookie_str))
  2144. return jsonify(result)
  2145. except Exception as e:
  2146. traceback.print_exc()
  2147. return jsonify({"success": False, "error": str(e)}), 500
  2148. # ==================== 登录状态检查接口 ====================
  2149. @app.route("/check_login", methods=["POST"])
  2150. def check_login():
  2151. """
  2152. 检查 Cookie 登录状态(通过浏览器访问后台页面检测)
  2153. 请求体:
  2154. {
  2155. "platform": "douyin", # douyin | xiaohongshu | kuaishou | weixin
  2156. "cookie": "cookie字符串或JSON"
  2157. }
  2158. 响应:
  2159. {
  2160. "success": true,
  2161. "valid": true, # Cookie 是否有效
  2162. "need_login": false, # 是否需要重新登录
  2163. "message": "登录状态有效"
  2164. }
  2165. """
  2166. try:
  2167. data = request.json
  2168. platform = data.get("platform", "").lower()
  2169. cookie_str = data.get("cookie", "")
  2170. print(f"[CheckLogin] 收到请求: platform={platform}")
  2171. if not platform:
  2172. return jsonify({"success": False, "error": "缺少 platform 参数"}), 400
  2173. if platform not in PLATFORM_MAP:
  2174. return jsonify({
  2175. "success": False,
  2176. "error": f"不支持的平台: {platform},支持: {list(PLATFORM_MAP.keys())}"
  2177. }), 400
  2178. if not cookie_str:
  2179. return jsonify({"success": False, "error": "缺少 cookie 参数"}), 400
  2180. # 获取对应平台的发布器
  2181. PublisherClass = get_publisher(platform)
  2182. publisher = PublisherClass(headless=HEADLESS_MODE)
  2183. # 执行登录检查
  2184. result = asyncio.run(publisher.check_login_status(cookie_str))
  2185. return jsonify(result)
  2186. except Exception as e:
  2187. traceback.print_exc()
  2188. return jsonify({
  2189. "success": False,
  2190. "valid": False,
  2191. "need_login": True,
  2192. "error": str(e)
  2193. }), 500
  2194. # ==================== 获取账号信息接口 ====================
  2195. @app.route("/account_info", methods=["POST"])
  2196. def get_account_info():
  2197. """
  2198. 获取账号信息
  2199. 请求体:
  2200. {
  2201. "platform": "baijiahao", # 平台
  2202. "cookie": "cookie字符串或JSON"
  2203. }
  2204. 响应:
  2205. {
  2206. "success": true,
  2207. "account_id": "xxx",
  2208. "account_name": "用户名",
  2209. "avatar_url": "头像URL",
  2210. "fans_count": 0,
  2211. "works_count": 0
  2212. }
  2213. """
  2214. try:
  2215. data = request.json
  2216. platform = data.get("platform", "").lower()
  2217. cookie_str = data.get("cookie", "")
  2218. print(f"[AccountInfo] 收到请求: platform={platform}")
  2219. if not platform:
  2220. return jsonify({"success": False, "error": "缺少 platform 参数"}), 400
  2221. if platform not in PLATFORM_MAP:
  2222. return jsonify({
  2223. "success": False,
  2224. "error": f"不支持的平台: {platform},支持: {list(PLATFORM_MAP.keys())}"
  2225. }), 400
  2226. if not cookie_str:
  2227. return jsonify({"success": False, "error": "缺少 cookie 参数"}), 400
  2228. # 获取对应平台的发布器
  2229. PublisherClass = get_publisher(platform)
  2230. publisher = PublisherClass(headless=HEADLESS_MODE)
  2231. # 检查是否有 get_account_info 方法
  2232. if hasattr(publisher, 'get_account_info'):
  2233. result = asyncio.run(publisher.get_account_info(cookie_str))
  2234. return jsonify(result)
  2235. else:
  2236. return jsonify({
  2237. "success": False,
  2238. "error": f"平台 {platform} 不支持获取账号信息"
  2239. }), 400
  2240. except Exception as e:
  2241. traceback.print_exc()
  2242. return jsonify({"success": False, "error": str(e)}), 500
  2243. # ==================== 健康检查 ====================
  2244. @app.route("/health", methods=["GET"])
  2245. def health_check():
  2246. """健康检查"""
  2247. # 检查 xhs SDK 是否可用
  2248. xhs_available = False
  2249. try:
  2250. from platforms.xiaohongshu import XHS_SDK_AVAILABLE
  2251. xhs_available = XHS_SDK_AVAILABLE
  2252. except:
  2253. pass
  2254. return jsonify({
  2255. "status": "ok",
  2256. "xhs_sdk": xhs_available,
  2257. "supported_platforms": list(PLATFORM_MAP.keys()),
  2258. "headless_mode": HEADLESS_MODE
  2259. })
  2260. @app.route("/", methods=["GET"])
  2261. def index():
  2262. """首页"""
  2263. return jsonify({
  2264. "name": "智媒通视频发布服务",
  2265. "version": "1.2.0",
  2266. "endpoints": {
  2267. "GET /": "服务信息",
  2268. "GET /health": "健康检查",
  2269. "POST /publish": "发布视频",
  2270. "POST /publish/batch": "批量发布",
  2271. "POST /works": "获取作品列表",
  2272. "POST /comments": "获取作品评论",
  2273. "POST /all_comments": "获取所有作品评论",
  2274. "POST /work_day_statistics": "保存作品每日统计数据",
  2275. "POST /work_day_statistics/batch": "获取作品历史统计数据",
  2276. "POST /check_cookie": "检查 Cookie",
  2277. "POST /sign": "小红书签名"
  2278. },
  2279. "supported_platforms": list(PLATFORM_MAP.keys())
  2280. })
  2281. # ==================== 命令行启动 ====================
  2282. def main():
  2283. parser = argparse.ArgumentParser(description='智媒通视频发布服务')
  2284. parser.add_argument('--port', type=int, default=5005, help='服务端口 (默认: 5005)')
  2285. # 从环境变量读取 HOST,默认仅本地访问
  2286. default_host = os.environ.get('PYTHON_HOST', os.environ.get('HOST', '127.0.0.1'))
  2287. parser.add_argument('--host', type=str, default=default_host, help='监听地址 (默认: 127.0.0.1,可通过 HOST 环境变量配置)')
  2288. parser.add_argument('--headless', type=str, default='true', help='是否无头模式 (默认: true)')
  2289. parser.add_argument('--debug', action='store_true', help='调试模式')
  2290. args = parser.parse_args()
  2291. global HEADLESS_MODE
  2292. HEADLESS_MODE = args.headless.lower() == 'true'
  2293. # 检查 xhs SDK
  2294. xhs_status = "未安装"
  2295. try:
  2296. from platforms.xiaohongshu import XHS_SDK_AVAILABLE
  2297. xhs_status = "已安装" if XHS_SDK_AVAILABLE else "未安装"
  2298. except:
  2299. pass
  2300. print("=" * 60)
  2301. print("智媒通视频发布服务")
  2302. print("=" * 60)
  2303. print(f"XHS SDK: {xhs_status}")
  2304. print(f"Headless 模式: {HEADLESS_MODE}")
  2305. print(f"支持平台: {', '.join(PLATFORM_MAP.keys())}")
  2306. print("=" * 60)
  2307. print(f"启动服务: http://{args.host}:{args.port}")
  2308. print("=" * 60)
  2309. app.run(host=args.host, port=args.port, debug=bool(args.debug), threaded=True, use_reloader=False)
  2310. @app.route('/auto-reply', methods=['POST'])
  2311. def auto_reply():
  2312. """
  2313. 微信视频号自动回复私信
  2314. POST /auto-reply
  2315. Body: {
  2316. "platform": "weixin",
  2317. "cookie": "..."
  2318. }
  2319. """
  2320. try:
  2321. data = request.json
  2322. platform = data.get('platform', '').lower()
  2323. cookie = data.get('cookie', '')
  2324. if platform != 'weixin':
  2325. return jsonify({
  2326. 'success': False,
  2327. 'error': '只支持微信视频号平台'
  2328. }), 400
  2329. if not cookie:
  2330. return jsonify({
  2331. 'success': False,
  2332. 'error': '缺少 Cookie'
  2333. }), 400
  2334. print(f"[API] 接收自动回复请求: platform={platform}")
  2335. # 创建 Publisher 实例
  2336. publisher = WeixinPublisher(headless=HEADLESS_MODE)
  2337. # 执行自动回复
  2338. loop = asyncio.new_event_loop()
  2339. asyncio.set_event_loop(loop)
  2340. result = loop.run_until_complete(publisher.auto_reply_private_messages(cookie))
  2341. loop.close()
  2342. print(f"[API] 自动回复结果: {result}")
  2343. return jsonify(result)
  2344. except Exception as e:
  2345. print(f"[API] 自动回复异常: {e}")
  2346. traceback.print_exc()
  2347. return jsonify({
  2348. 'success': False,
  2349. 'error': str(e)
  2350. }), 500
  2351. if __name__ == '__main__':
  2352. main()
  2353. ================================================================================
  2354. 文件: server\python\platforms\weixin.py
  2355. ================================================================================
  2356. # -*- coding: utf-8 -*-
  2357. """
  2358. 微信视频号发布器
  2359. 参考: matrix/tencent_uploader/main.py
  2360. """
  2361. import asyncio
  2362. import json
  2363. import os
  2364. from datetime import datetime
  2365. from typing import List
  2366. from .base import (
  2367. BasePublisher, PublishParams, PublishResult,
  2368. WorkItem, WorksResult, CommentItem, CommentsResult
  2369. )
  2370. import os
  2371. import time
  2372. # 允许通过环境变量手动指定“上传视频入口”的选择器,便于在页面结构频繁变更时快速调整
  2373. WEIXIN_UPLOAD_SELECTOR = os.environ.get("WEIXIN_UPLOAD_SELECTOR", "").strip()
  2374. def format_short_title(origin_title: str) -> str:
  2375. """
  2376. 格式化短标题
  2377. - 移除特殊字符
  2378. - 长度限制在 6-16 字符
  2379. """
  2380. allowed_special_chars = "《》"":+?%°"
  2381. filtered_chars = [
  2382. char if char.isalnum() or char in allowed_special_chars
  2383. else ' ' if char == ',' else ''
  2384. for char in origin_title
  2385. ]
  2386. formatted_string = ''.join(filtered_chars)
  2387. if len(formatted_string) > 16:
  2388. formatted_string = formatted_string[:16]
  2389. elif len(formatted_string) < 6:
  2390. formatted_string += ' ' * (6 - len(formatted_string))
  2391. return formatted_string
  2392. class WeixinPublisher(BasePublisher):
  2393. """
  2394. 微信视频号发布器
  2395. 使用 Playwright 自动化操作视频号创作者中心
  2396. 注意: 需要使用 Chrome 浏览器,否则可能出现 H264 编码错误
  2397. """
  2398. platform_name = "weixin"
  2399. login_url = "https://channels.weixin.qq.com/platform"
  2400. publish_url = "https://channels.weixin.qq.com/platform/post/create"
  2401. cookie_domain = ".weixin.qq.com"
  2402. def _parse_count(self, count_str: str) -> int:
  2403. """解析数字(支持带'万'的格式)"""
  2404. try:
  2405. count_str = count_str.strip()
  2406. if '万' in count_str:
  2407. return int(float(count_str.replace('万', '')) * 10000)
  2408. return int(count_str)
  2409. except:
  2410. return 0
  2411. async def ai_find_upload_selector(self, frame_html: str, frame_name: str = "main") -> str:
  2412. """
  2413. 使用 AI 从 HTML 中识别“上传视频/选择文件”相关元素的 CSS 选择器。
  2414. 设计思路:
  2415. - 仅在常规 DOM 选择器都失败时调用,避免频繁占用 AI 配额;
  2416. - 通过 DashScope 文本模型(与验证码识别同一套配置)分析 HTML;
  2417. - 返回一个适合用于 frame.locator(selector) 的 CSS 选择器。
  2418. """
  2419. import json
  2420. import re
  2421. import requests
  2422. import os
  2423. # 避免 HTML 过长导致 token 超限,只截取前 N 字符
  2424. if not frame_html:
  2425. return ""
  2426. max_len = 20000
  2427. if len(frame_html) > max_len:
  2428. frame_html = frame_html[:max_len]
  2429. ai_api_key = os.environ.get("DASHSCOPE_API_KEY", "")
  2430. ai_base_url = os.environ.get("DASHSCOPE_BASE_URL", "https://dashscope.aliyuncs.com/compatible-mode/v1")
  2431. ai_text_model = os.environ.get("AI_TEXT_MODEL", "qwen-plus")
  2432. if not ai_api_key:
  2433. print(f"[{self.platform_name}] AI上传入口识别: 未配置 AI API Key,跳过")
  2434. return ""
  2435. prompt = f"""
  2436. 你是熟悉微信视频号后台的前端工程师,现在需要在一段 HTML 中找到“上传视频文件”的入口。
  2437. 页面说明:
  2438. - 平台:微信视频号(channels.weixin.qq.com)
  2439. - 目标:用于上传视频文件的按钮或 input(一般会触发文件选择框)
  2440. - 你会收到某个 frame 的完整 HTML 片段(不包含截图)。
  2441. 请你根据下面的 HTML,推断最适合用于上传视频文件的元素,并输出一个可以被 Playwright 使用的 CSS 选择器。
  2442. 要求:
  2443. 1. 只考虑“上传/选择视频文件”的入口,不要返回“发布/发表/下一步”等按钮;
  2444. 2. 选择器需要尽量稳定,不要使用自动生成的随机类名(例如带很多随机字母/数字的类名可以用前缀匹配);
  2445. 3. 选择器必须是 CSS 选择器(不要返回 XPath);
  2446. 4. 如果确实找不到合理的上传入口,返回 selector 为空字符串。
  2447. 请以 JSON 格式输出,严格遵守以下结构(不要添加任何解释文字):
  2448. ```json
  2449. {{
  2450. "selector": "CSS 选择器字符串,比如:input[type='file'] 或 div.upload-content input[type='file']"
  2451. }}
  2452. ```
  2453. 下面是 frame=\"{frame_name}\" 的 HTML:
  2454. ```html
  2455. {frame_html}
  2456. ```"""
  2457. payload = {
  2458. "model": ai_text_model,
  2459. "messages": [
  2460. {
  2461. "role": "user",
  2462. "content": prompt,
  2463. }
  2464. ],
  2465. "max_tokens": 600,
  2466. }
  2467. headers = {
  2468. "Authorization": f"Bearer {ai_api_key}",
  2469. "Content-Type": "application/json",
  2470. }
  2471. try:
  2472. print(f"[{self.platform_name}] AI上传入口识别: 正在分析 frame={frame_name} HTML...")
  2473. resp = requests.post(
  2474. f"{ai_base_url}/chat/completions",
  2475. headers=headers,
  2476. json=payload,
  2477. timeout=40,
  2478. )
  2479. if resp.status_code != 200:
  2480. print(f"[{self.platform_name}] AI上传入口识别: API 返回错误 {resp.status_code}")
  2481. return ""
  2482. data = resp.json()
  2483. content = data.get("choices", [{}])[0].get("message", {}).get("content", "") or ""
  2484. # 尝试从 ```json``` 代码块中解析
  2485. json_match = re.search(r"```json\\s*([\\s\\S]*?)\\s*```", content)
  2486. if json_match:
  2487. json_str = json_match.group(1)
  2488. else:
  2489. json_match = re.search(r"\\{[\\s\\S]*\\}", content)
  2490. json_str = json_match.group(0) if json_match else "{}"
  2491. try:
  2492. result = json.loads(json_str)
  2493. except Exception:
  2494. result = {}
  2495. selector = (result.get("selector") or "").strip()
  2496. print(f"[{self.platform_name}] AI上传入口识别结果: selector='{selector}'")
  2497. return selector
  2498. except Exception as e:
  2499. print(f"[{self.platform_name}] AI上传入口识别异常: {e}")
  2500. return ""
  2501. async def ai_pick_selector_from_candidates(self, candidates: list, goal: str, frame_name: str = "main") -> str:
  2502. """
  2503. 将“候选元素列表(包含 css selector + 文本/属性)”发给 AI,让 AI 直接挑选最符合 goal 的元素。
  2504. 适用于:HTML 里看不出上传入口、或页面大量动态渲染时。
  2505. """
  2506. import json
  2507. import re
  2508. import requests
  2509. import os
  2510. if not candidates:
  2511. return ""
  2512. ai_api_key = os.environ.get("DASHSCOPE_API_KEY", "")
  2513. ai_base_url = os.environ.get("DASHSCOPE_BASE_URL", "https://dashscope.aliyuncs.com/compatible-mode/v1")
  2514. ai_text_model = os.environ.get("AI_TEXT_MODEL", "qwen-plus")
  2515. if not ai_api_key:
  2516. print(f"[{self.platform_name}] AI候选选择器: 未配置 AI API Key,跳过")
  2517. return ""
  2518. # 控制长度,最多取前 120 个候选
  2519. candidates = candidates[:120]
  2520. prompt = f"""
  2521. 你是自动化发布工程师。现在要在微信视频号(channels.weixin.qq.com)发布页面里找到“{goal}”相关的入口元素。
  2522. 我会给你一组候选元素,每个候选都包含:
  2523. - css: 可直接用于 Playwright 的 CSS 选择器
  2524. - tag / type / role / ariaLabel / text / id / className(部分字段可能为空)
  2525. 你的任务:
  2526. - 从候选中选出最可能用于“{goal}”的元素,返回它的 css 选择器;
  2527. - 如果没有任何候选符合,返回空字符串。
  2528. 注意:
  2529. - 如果 goal 是“上传视频入口”,优先选择 input[type=file] 或看起来会触发选择文件/上传的区域;
  2530. - 不要选择“发布/发表/下一步”等按钮(除非 goal 明确是发布按钮)。
  2531. 请严格按 JSON 输出(不要解释):
  2532. ```json
  2533. {{ "selector": "..." }}
  2534. ```
  2535. 候选列表(frame={frame_name}):
  2536. ```json
  2537. {json.dumps(candidates, ensure_ascii=False)}
  2538. ```"""
  2539. payload = {
  2540. "model": ai_text_model,
  2541. "messages": [{"role": "user", "content": prompt}],
  2542. "max_tokens": 400,
  2543. }
  2544. headers = {
  2545. "Authorization": f"Bearer {ai_api_key}",
  2546. "Content-Type": "application/json",
  2547. }
  2548. try:
  2549. print(f"[{self.platform_name}] AI候选选择器: 正在分析 frame={frame_name}, goal={goal} ...")
  2550. resp = requests.post(
  2551. f"{ai_base_url}/chat/completions",
  2552. headers=headers,
  2553. json=payload,
  2554. timeout=40,
  2555. )
  2556. if resp.status_code != 200:
  2557. print(f"[{self.platform_name}] AI候选选择器: API 返回错误 {resp.status_code}")
  2558. return ""
  2559. data = resp.json()
  2560. content = data.get("choices", [{}])[0].get("message", {}).get("content", "") or ""
  2561. json_match = re.search(r"```json\\s*([\\s\\S]*?)\\s*```", content)
  2562. if json_match:
  2563. json_str = json_match.group(1)
  2564. else:
  2565. json_match = re.search(r"\\{[\\s\\S]*\\}", content)
  2566. json_str = json_match.group(0) if json_match else "{}"
  2567. try:
  2568. result = json.loads(json_str)
  2569. except Exception:
  2570. result = {}
  2571. selector = (result.get("selector") or "").strip()
  2572. print(f"[{self.platform_name}] AI候选选择器结果: selector='{selector}'")
  2573. return selector
  2574. except Exception as e:
  2575. print(f"[{self.platform_name}] AI候选选择器异常: {e}")
  2576. return ""
  2577. async def _extract_relevant_html_snippets(self, html: str) -> str:
  2578. """
  2579. 从 HTML 中抽取与上传相关的片段,减少 token,提升 AI 命中率。
  2580. - 优先抓取包含 upload/上传/file/input 等关键词的窗口片段
  2581. - 若未命中关键词,返回“开头 + 结尾”的拼接
  2582. """
  2583. import re
  2584. if not html:
  2585. return ""
  2586. patterns = [
  2587. r"upload",
  2588. r"uploader",
  2589. r"file",
  2590. r"type\\s*=\\s*['\\\"]file['\\\"]",
  2591. r"input",
  2592. r"drag",
  2593. r"drop",
  2594. r"选择",
  2595. r"上传",
  2596. r"添加",
  2597. r"视频",
  2598. ]
  2599. regex = re.compile("|".join(patterns), re.IGNORECASE)
  2600. snippets = []
  2601. for m in regex.finditer(html):
  2602. start = max(0, m.start() - 350)
  2603. end = min(len(html), m.end() + 350)
  2604. snippets.append(html[start:end])
  2605. if len(snippets) >= 18:
  2606. break
  2607. if snippets:
  2608. # 去重(粗略)
  2609. unique = []
  2610. seen = set()
  2611. for s in snippets:
  2612. key = hash(s)
  2613. if key not in seen:
  2614. seen.add(key)
  2615. unique.append(s)
  2616. return "\n\n<!-- SNIPPET -->\n\n".join(unique)[:20000]
  2617. # fallback: head + tail
  2618. head = html[:9000]
  2619. tail = html[-9000:] if len(html) > 9000 else ""
  2620. return (head + "\n\n<!-- TAIL -->\n\n" + tail)[:20000]
  2621. async def init_browser(self, storage_state: str = None):
  2622. """初始化浏览器 - 参考 matrix 使用 channel=chrome 避免 H264 编码错误"""
  2623. from playwright.async_api import async_playwright
  2624. playwright = await async_playwright().start()
  2625. proxy = self.proxy_config if isinstance(getattr(self, 'proxy_config', None), dict) else None
  2626. if proxy and proxy.get('server'):
  2627. print(f"[{self.platform_name}] 使用代理: {proxy.get('server')}", flush=True)
  2628. # 参考 matrix: 使用系统内的 Chrome 浏览器,避免 H264 编码错误
  2629. # 如果没有安装 Chrome,则使用默认 Chromium
  2630. try:
  2631. self.browser = await playwright.chromium.launch(
  2632. headless=self.headless,
  2633. channel="chrome", # 使用系统 Chrome
  2634. proxy=proxy if proxy and proxy.get('server') else None
  2635. )
  2636. print(f"[{self.platform_name}] 使用系统 Chrome 浏览器")
  2637. except Exception as e:
  2638. print(f"[{self.platform_name}] Chrome 不可用,使用 Chromium: {e}")
  2639. self.browser = await playwright.chromium.launch(
  2640. headless=self.headless,
  2641. proxy=proxy if proxy and proxy.get('server') else None
  2642. )
  2643. # 设置 HTTP Headers 防止重定向
  2644. headers = {
  2645. "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
  2646. "Referer": "https://channels.weixin.qq.com/platform/post/list",
  2647. }
  2648. self.context = await self.browser.new_context(
  2649. extra_http_headers=headers,
  2650. ignore_https_errors=True,
  2651. viewport={"width": 1920, "height": 1080},
  2652. user_agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
  2653. )
  2654. self.page = await self.context.new_page()
  2655. return self.page
  2656. async def set_schedule_time(self, publish_date: datetime):
  2657. """设置定时发布"""
  2658. if not self.page:
  2659. return
  2660. print(f"[{self.platform_name}] 设置定时发布...")
  2661. # 点击定时选项
  2662. label_element = self.page.locator("label").filter(has_text="定时").nth(1)
  2663. await label_element.click()
  2664. # 选择日期
  2665. await self.page.click('input[placeholder="请选择发表时间"]')
  2666. publish_month = f"{publish_date.month:02d}"
  2667. current_month = f"{publish_month}月"
  2668. # 检查月份
  2669. page_month = await self.page.inner_text('span.weui-desktop-picker__panel__label:has-text("月")')
  2670. if page_month != current_month:
  2671. await self.page.click('button.weui-desktop-btn__icon__right')
  2672. # 选择日期
  2673. elements = await self.page.query_selector_all('table.weui-desktop-picker__table a')
  2674. for element in elements:
  2675. class_name = await element.evaluate('el => el.className')
  2676. if 'weui-desktop-picker__disabled' in class_name:
  2677. continue
  2678. text = await element.inner_text()
  2679. if text.strip() == str(publish_date.day):
  2680. await element.click()
  2681. break
  2682. # 输入时间
  2683. await self.page.click('input[placeholder="请选择时间"]')
  2684. await self.page.keyboard.press("Control+KeyA")
  2685. await self.page.keyboard.type(str(publish_date.hour))
  2686. # 点击其他地方确认
  2687. await self.page.locator("div.input-editor").click()
  2688. async def handle_upload_error(self, video_path: str):
  2689. """处理上传错误"""
  2690. if not self.page:
  2691. return
  2692. print(f"[{self.platform_name}] 视频出错了,重新上传中...")
  2693. await self.page.locator('div.media-status-content div.tag-inner:has-text("删除")').click()
  2694. await self.page.get_by_role('button', name="删除", exact=True).click()
  2695. file_input = self.page.locator('input[type="file"]')
  2696. await file_input.set_input_files(video_path)
  2697. async def add_title_tags(self, params: PublishParams):
  2698. """添加标题和话题"""
  2699. if not self.page:
  2700. return
  2701. await self.page.locator("div.input-editor").click()
  2702. await self.page.keyboard.type(params.title)
  2703. if params.tags:
  2704. await self.page.keyboard.press("Enter")
  2705. for tag in params.tags:
  2706. await self.page.keyboard.type("#" + tag)
  2707. await self.page.keyboard.press("Space")
  2708. print(f"[{self.platform_name}] 成功添加标题和 {len(params.tags)} 个话题")
  2709. async def add_short_title(self):
  2710. """添加短标题"""
  2711. if not self.page:
  2712. return
  2713. try:
  2714. short_title_element = self.page.get_by_text("短标题", exact=True).locator("..").locator(
  2715. "xpath=following-sibling::div").locator('span input[type="text"]')
  2716. if await short_title_element.count():
  2717. # 获取已有内容作为短标题
  2718. pass
  2719. except:
  2720. pass
  2721. async def upload_cover(self, cover_path: str):
  2722. """上传封面图"""
  2723. if not self.page or not cover_path or not os.path.exists(cover_path):
  2724. return
  2725. try:
  2726. await asyncio.sleep(2)
  2727. preview_btn_info = await self.page.locator(
  2728. 'div.finder-tag-wrap.btn:has-text("更换封面")').get_attribute('class')
  2729. if "disabled" not in preview_btn_info:
  2730. await self.page.locator('div.finder-tag-wrap.btn:has-text("更换封面")').click()
  2731. await self.page.locator('div.single-cover-uploader-wrap > div.wrap').hover()
  2732. # 删除现有封面
  2733. if await self.page.locator(".del-wrap > .svg-icon").count():
  2734. await self.page.locator(".del-wrap > .svg-icon").click()
  2735. # 上传新封面
  2736. preview_div = self.page.locator("div.single-cover-uploader-wrap > div.wrap")
  2737. async with self.page.expect_file_chooser() as fc_info:
  2738. await preview_div.click()
  2739. preview_chooser = await fc_info.value
  2740. await preview_chooser.set_files(cover_path)
  2741. await asyncio.sleep(2)
  2742. await self.page.get_by_role("button", name="确定").click()
  2743. await asyncio.sleep(1)
  2744. await self.page.get_by_role("button", name="确认").click()
  2745. print(f"[{self.platform_name}] 封面上传成功")
  2746. except Exception as e:
  2747. print(f"[{self.platform_name}] 封面上传失败: {e}")
  2748. async def check_captcha(self) -> dict:
  2749. """检查页面是否需要验证码"""
  2750. if not self.page:
  2751. return {'need_captcha': False, 'captcha_type': ''}
  2752. try:
  2753. # 检查各种验证码
  2754. captcha_selectors = [
  2755. 'text="请输入验证码"',
  2756. 'text="滑动验证"',
  2757. '[class*="captcha"]',
  2758. '[class*="verify"]',
  2759. ]
  2760. for selector in captcha_selectors:
  2761. try:
  2762. if await self.page.locator(selector).count() > 0:
  2763. print(f"[{self.platform_name}] 检测到验证码: {selector}")
  2764. return {'need_captcha': True, 'captcha_type': 'image'}
  2765. except:
  2766. pass
  2767. # 检查登录弹窗
  2768. login_selectors = [
  2769. 'text="请登录"',
  2770. 'text="扫码登录"',
  2771. '[class*="login-dialog"]',
  2772. ]
  2773. for selector in login_selectors:
  2774. try:
  2775. if await self.page.locator(selector).count() > 0:
  2776. print(f"[{self.platform_name}] 检测到需要登录: {selector}")
  2777. return {'need_captcha': True, 'captcha_type': 'login'}
  2778. except:
  2779. pass
  2780. except Exception as e:
  2781. print(f"[{self.platform_name}] 验证码检测异常: {e}")
  2782. return {'need_captcha': False, 'captcha_type': ''}
  2783. async def publish(self, cookies: str, params: PublishParams) -> PublishResult:
  2784. """发布视频到视频号"""
  2785. print(f"\n{'='*60}")
  2786. print(f"[{self.platform_name}] 开始发布视频")
  2787. print(f"[{self.platform_name}] 视频路径: {params.video_path}")
  2788. print(f"[{self.platform_name}] 标题: {params.title}")
  2789. print(f"[{self.platform_name}] Headless: {self.headless}")
  2790. print(f"{'='*60}")
  2791. self.report_progress(5, "正在初始化浏览器...")
  2792. # 初始化浏览器(使用 Chrome)
  2793. await self.init_browser()
  2794. print(f"[{self.platform_name}] 浏览器初始化完成")
  2795. # 解析并设置 cookies
  2796. cookie_list = self.parse_cookies(cookies)
  2797. print(cookie_list)
  2798. print(f"[{self.platform_name}] 解析到 {len(cookie_list)} 个 cookies")
  2799. await self.set_cookies(cookie_list)
  2800. if not self.page:
  2801. raise Exception("Page not initialized")
  2802. # 检查视频文件
  2803. if not os.path.exists(params.video_path):
  2804. raise Exception(f"视频文件不存在: {params.video_path}")
  2805. print(f"[{self.platform_name}] 视频文件存在,大小: {os.path.getsize(params.video_path)} bytes")
  2806. self.report_progress(10, "正在打开上传页面...")
  2807. # 访问上传页面
  2808. await self.page.goto(self.publish_url, wait_until="networkidle", timeout=60000)
  2809. await asyncio.sleep(3)
  2810. # 检查是否跳转到登录页
  2811. current_url = self.page.url
  2812. print(f"[{self.platform_name}] 当前页面: {current_url}")
  2813. if "login" in current_url:
  2814. screenshot_base64 = await self.capture_screenshot()
  2815. return PublishResult(
  2816. success=False,
  2817. platform=self.platform_name,
  2818. error="Cookie 已过期,需要重新登录",
  2819. need_captcha=True,
  2820. captcha_type='login',
  2821. screenshot_base64=screenshot_base64,
  2822. page_url=current_url,
  2823. status='need_captcha'
  2824. )
  2825. # 使用 AI 检查验证码
  2826. ai_captcha = await self.ai_check_captcha()
  2827. if ai_captcha['has_captcha']:
  2828. print(f"[{self.platform_name}] AI检测到验证码: {ai_captcha['captcha_type']}", flush=True)
  2829. screenshot_base64 = await self.capture_screenshot()
  2830. return PublishResult(
  2831. success=False,
  2832. platform=self.platform_name,
  2833. error=f"检测到{ai_captcha['captcha_type']}验证码,需要使用有头浏览器完成验证",
  2834. need_captcha=True,
  2835. captcha_type=ai_captcha['captcha_type'],
  2836. screenshot_base64=screenshot_base64,
  2837. page_url=current_url,
  2838. status='need_captcha'
  2839. )
  2840. # 传统方式检查验证码
  2841. captcha_result = await self.check_captcha()
  2842. if captcha_result['need_captcha']:
  2843. screenshot_base64 = await self.capture_screenshot()
  2844. return PublishResult(
  2845. success=False,
  2846. platform=self.platform_name,
  2847. error=f"需要{captcha_result['captcha_type']}验证码,请使用有头浏览器完成验证",
  2848. need_captcha=True,
  2849. captcha_type=captcha_result['captcha_type'],
  2850. screenshot_base64=screenshot_base64,
  2851. page_url=current_url,
  2852. status='need_captcha'
  2853. )
  2854. self.report_progress(15, "正在选择视频文件...")
  2855. # 上传视频
  2856. # 说明:视频号发布页在不同账号/地区/灰度下 DOM 结构差异较大,且上传组件可能在 iframe 中。
  2857. # 因此这里按 matrix 的思路“点击触发 file chooser”,同时增加“遍历全部 frame + 精确挑选 video input”的兜底。
  2858. upload_success = False
  2859. if not self.page:
  2860. raise Exception("Page not initialized")
  2861. # 等待页面把上传区域渲染出来(避免过早判断)
  2862. try:
  2863. await self.page.wait_for_selector("div.upload-content, input[type='file'], iframe", timeout=20000)
  2864. except Exception:
  2865. pass
  2866. async def _try_set_files_in_frame(frame, frame_name: str) -> bool:
  2867. """在指定 frame 中尝试触发上传"""
  2868. nonlocal upload_success
  2869. if upload_success:
  2870. return True
  2871. # 方法0:如果用户通过环境变量显式配置了选择器,优先尝试这个
  2872. if WEIXIN_UPLOAD_SELECTOR:
  2873. try:
  2874. el = frame.locator(WEIXIN_UPLOAD_SELECTOR).first
  2875. if await el.count() > 0 and await el.is_visible():
  2876. print(f"[{self.platform_name}] [{frame_name}] 使用环境变量 WEIXIN_UPLOAD_SELECTOR: {WEIXIN_UPLOAD_SELECTOR}")
  2877. try:
  2878. async with self.page.expect_file_chooser(timeout=5000) as fc_info:
  2879. await el.click()
  2880. chooser = await fc_info.value
  2881. await chooser.set_files(params.video_path)
  2882. upload_success = True
  2883. print(f"[{self.platform_name}] [{frame_name}] 通过环境变量选择器上传成功")
  2884. return True
  2885. except Exception as e:
  2886. print(f"[{self.platform_name}] [{frame_name}] 环境变量选择器点击失败,尝试直接 set_input_files: {e}")
  2887. try:
  2888. await el.set_input_files(params.video_path)
  2889. upload_success = True
  2890. print(f"[{self.platform_name}] [{frame_name}] 环境变量选择器 set_input_files 成功")
  2891. return True
  2892. except Exception as e2:
  2893. print(f"[{self.platform_name}] [{frame_name}] 环境变量选择器 set_input_files 仍失败: {e2}")
  2894. except Exception as e:
  2895. print(f"[{self.platform_name}] [{frame_name}] 使用环境变量选择器定位元素失败: {e}")
  2896. # 先尝试点击上传区域触发 chooser(最贴近 matrix)
  2897. click_selectors = [
  2898. "div.upload-content",
  2899. "div[class*='upload-content']",
  2900. "div[class*='upload']",
  2901. "div.add-wrap",
  2902. "[class*='uploader']",
  2903. "text=点击上传",
  2904. "text=上传视频",
  2905. "text=选择视频",
  2906. ]
  2907. for selector in click_selectors:
  2908. try:
  2909. el = frame.locator(selector).first
  2910. if await el.count() > 0 and await el.is_visible():
  2911. print(f"[{self.platform_name}] [{frame_name}] 找到可点击上传区域: {selector}")
  2912. try:
  2913. async with self.page.expect_file_chooser(timeout=5000) as fc_info:
  2914. await el.click()
  2915. chooser = await fc_info.value
  2916. await chooser.set_files(params.video_path)
  2917. upload_success = True
  2918. print(f"[{self.platform_name}] [{frame_name}] 通过 file chooser 上传成功")
  2919. return True
  2920. except Exception as e:
  2921. print(f"[{self.platform_name}] [{frame_name}] 点击触发 chooser 失败: {e}")
  2922. except Exception:
  2923. pass
  2924. # 再尝试直接设置 input[type=file](iframe/隐藏 input 常见)
  2925. try:
  2926. inputs = frame.locator("input[type='file']")
  2927. cnt = await inputs.count()
  2928. if cnt > 0:
  2929. best_idx = 0
  2930. best_score = -1
  2931. for i in range(cnt):
  2932. try:
  2933. inp = inputs.nth(i)
  2934. accept = (await inp.get_attribute("accept")) or ""
  2935. multiple = (await inp.get_attribute("multiple")) or ""
  2936. score = 0
  2937. if "video" in accept:
  2938. score += 10
  2939. if "mp4" in accept:
  2940. score += 3
  2941. if multiple:
  2942. score += 1
  2943. if score > best_score:
  2944. best_score = score
  2945. best_idx = i
  2946. except Exception:
  2947. continue
  2948. target = inputs.nth(best_idx)
  2949. print(f"[{self.platform_name}] [{frame_name}] 尝试对 input[{best_idx}] set_input_files (score={best_score})")
  2950. await target.set_input_files(params.video_path)
  2951. upload_success = True
  2952. print(f"[{self.platform_name}] [{frame_name}] 通过 file input 上传成功")
  2953. return True
  2954. except Exception as e:
  2955. print(f"[{self.platform_name}] [{frame_name}] file input 上传失败: {e}")
  2956. # 不直接返回,让后面的 AI 兜底有机会执行
  2957. # 方法4: 兜底使用 AI 分析 HTML,猜测上传入口
  2958. try:
  2959. frame_url = getattr(frame, "url", "")
  2960. html_full = await frame.content()
  2961. html_for_ai = await self._extract_relevant_html_snippets(html_full)
  2962. print(f"[{self.platform_name}] [{frame_name}] frame_url={frame_url}, html_len={len(html_full)}, html_for_ai_len={len(html_for_ai)}")
  2963. ai_selector = await self.ai_find_upload_selector(html_for_ai, frame_name=frame_name)
  2964. if ai_selector:
  2965. try:
  2966. el = frame.locator(ai_selector).first
  2967. if await el.count() > 0:
  2968. print(f"[{self.platform_name}] [{frame_name}] 使用 AI 选择器点击上传入口: {ai_selector}")
  2969. try:
  2970. async with self.page.expect_file_chooser(timeout=5000) as fc_info:
  2971. await el.click()
  2972. chooser = await fc_info.value
  2973. await chooser.set_files(params.video_path)
  2974. upload_success = True
  2975. print(f"[{self.platform_name}] [{frame_name}] 通过 AI 选择器上传成功")
  2976. return True
  2977. except Exception as e:
  2978. print(f"[{self.platform_name}] [{frame_name}] AI 选择器点击失败,改为直接 set_input_files: {e}")
  2979. try:
  2980. await el.set_input_files(params.video_path)
  2981. upload_success = True
  2982. print(f"[{self.platform_name}] [{frame_name}] AI 选择器直接 set_input_files 成功")
  2983. return True
  2984. except Exception as e2:
  2985. print(f"[{self.platform_name}] [{frame_name}] AI 选择器 set_input_files 仍失败: {e2}")
  2986. except Exception as e:
  2987. print(f"[{self.platform_name}] [{frame_name}] 使用 AI 选择器定位元素失败: {e}")
  2988. else:
  2989. # 如果 AI 无法从 HTML 推断,退一步:构造候选元素列表交给 AI 选择
  2990. try:
  2991. candidates = await frame.evaluate("""
  2992. () => {
  2993. function cssEscape(s) {
  2994. try { return CSS.escape(s); } catch (e) { return s.replace(/[^a-zA-Z0-9_-]/g, '\\\\$&'); }
  2995. }
  2996. function buildSelector(el) {
  2997. if (!el || el.nodeType !== 1) return '';
  2998. if (el.id) return `#${cssEscape(el.id)}`;
  2999. let parts = [];
  3000. let cur = el;
  3001. for (let depth = 0; cur && cur.nodeType === 1 && depth < 5; depth++) {
  3002. let part = cur.tagName.toLowerCase();
  3003. const role = cur.getAttribute('role');
  3004. const type = cur.getAttribute('type');
  3005. if (type) part += `[type="${type}"]`;
  3006. if (role) part += `[role="${role}"]`;
  3007. const cls = (cur.className || '').toString().trim().split(/\\s+/).filter(Boolean);
  3008. if (cls.length) part += '.' + cls.slice(0, 2).map(cssEscape).join('.');
  3009. // nth-of-type
  3010. let idx = 1;
  3011. let sib = cur;
  3012. while (sib && (sib = sib.previousElementSibling)) {
  3013. if (sib.tagName === cur.tagName) idx++;
  3014. }
  3015. part += `:nth-of-type(${idx})`;
  3016. parts.unshift(part);
  3017. cur = cur.parentElement;
  3018. }
  3019. return parts.join(' > ');
  3020. }
  3021. const nodes = Array.from(document.querySelectorAll('input, button, a, div, span'))
  3022. .filter(el => {
  3023. const tag = el.tagName.toLowerCase();
  3024. const type = (el.getAttribute('type') || '').toLowerCase();
  3025. const role = (el.getAttribute('role') || '').toLowerCase();
  3026. const aria = (el.getAttribute('aria-label') || '').toLowerCase();
  3027. const txt = (el.innerText || '').trim().slice(0, 60);
  3028. const cls = (el.className || '').toString().toLowerCase();
  3029. const isFile = tag === 'input' && type === 'file';
  3030. const looksClickable =
  3031. tag === 'button' || tag === 'a' || role === 'button' || el.onclick ||
  3032. cls.includes('upload') || cls.includes('uploader') || cls.includes('drag') ||
  3033. aria.includes('上传') || aria.includes('选择') || aria.includes('添加') ||
  3034. txt.includes('上传') || txt.includes('选择') || txt.includes('添加') || txt.includes('点击上传');
  3035. if (!isFile && !looksClickable) return false;
  3036. const r = el.getBoundingClientRect();
  3037. const visible = r.width > 5 && r.height > 5;
  3038. return visible;
  3039. });
  3040. const limited = nodes.slice(0, 120).map(el => ({
  3041. css: buildSelector(el),
  3042. tag: el.tagName.toLowerCase(),
  3043. type: el.getAttribute('type') || '',
  3044. role: el.getAttribute('role') || '',
  3045. ariaLabel: el.getAttribute('aria-label') || '',
  3046. text: (el.innerText || '').trim().slice(0, 80),
  3047. id: el.id || '',
  3048. className: (el.className || '').toString().slice(0, 120),
  3049. accept: el.getAttribute('accept') || '',
  3050. }));
  3051. return limited;
  3052. }
  3053. """)
  3054. ai_selector2 = await self.ai_pick_selector_from_candidates(
  3055. candidates=candidates,
  3056. goal="上传视频入口",
  3057. frame_name=frame_name
  3058. )
  3059. if ai_selector2:
  3060. el2 = frame.locator(ai_selector2).first
  3061. if await el2.count() > 0:
  3062. print(f"[{self.platform_name}] [{frame_name}] 使用 AI 候选选择器点击上传入口: {ai_selector2}")
  3063. try:
  3064. async with self.page.expect_file_chooser(timeout=5000) as fc_info:
  3065. await el2.click()
  3066. chooser2 = await fc_info.value
  3067. await chooser2.set_files(params.video_path)
  3068. upload_success = True
  3069. print(f"[{self.platform_name}] [{frame_name}] 通过 AI 候选选择器上传成功")
  3070. return True
  3071. except Exception as e:
  3072. print(f"[{self.platform_name}] [{frame_name}] AI 候选选择器点击失败,尝试 set_input_files: {e}")
  3073. try:
  3074. await el2.set_input_files(params.video_path)
  3075. upload_success = True
  3076. print(f"[{self.platform_name}] [{frame_name}] AI 候选选择器 set_input_files 成功")
  3077. return True
  3078. except Exception as e2:
  3079. print(f"[{self.platform_name}] [{frame_name}] AI 候选选择器 set_input_files 仍失败: {e2}")
  3080. except Exception as e:
  3081. print(f"[{self.platform_name}] [{frame_name}] 构造候选并交给 AI 失败: {e}")
  3082. except Exception as e:
  3083. print(f"[{self.platform_name}] [{frame_name}] AI 上传入口识别整体失败: {e}")
  3084. return False
  3085. # 先尝试主 frame
  3086. try:
  3087. await _try_set_files_in_frame(self.page.main_frame, "main")
  3088. except Exception as e:
  3089. print(f"[{self.platform_name}] main frame 上传尝试异常: {e}")
  3090. # 再遍历所有子 frame
  3091. if not upload_success:
  3092. try:
  3093. frames = self.page.frames
  3094. print(f"[{self.platform_name}] 发现 frames: {len(frames)}")
  3095. for idx, fr in enumerate(frames):
  3096. if upload_success:
  3097. break
  3098. # main_frame 已尝试过
  3099. if fr == self.page.main_frame:
  3100. continue
  3101. name = fr.name or f"frame-{idx}"
  3102. await _try_set_files_in_frame(fr, name)
  3103. except Exception as e:
  3104. print(f"[{self.platform_name}] 遍历 frames 异常: {e}")
  3105. if not upload_success:
  3106. screenshot_base64 = await self.capture_screenshot()
  3107. return PublishResult(
  3108. success=False,
  3109. platform=self.platform_name,
  3110. error="未找到上传入口(可能在 iframe 中或页面结构已变更)",
  3111. screenshot_base64=screenshot_base64,
  3112. page_url=await self.get_page_url(),
  3113. status='failed'
  3114. )
  3115. self.report_progress(20, "正在填充标题和话题...")
  3116. # 添加标题和话题
  3117. await self.add_title_tags(params)
  3118. self.report_progress(30, "等待视频上传完成...")
  3119. # 等待上传完成
  3120. for _ in range(120):
  3121. try:
  3122. button_info = await self.page.get_by_role("button", name="发表").get_attribute('class')
  3123. if "weui-desktop-btn_disabled" not in button_info:
  3124. print(f"[{self.platform_name}] 视频上传完毕")
  3125. # 上传封面
  3126. self.report_progress(50, "正在上传封面...")
  3127. await self.upload_cover(params.cover_path)
  3128. break
  3129. else:
  3130. # 检查上传错误
  3131. if await self.page.locator('div.status-msg.error').count():
  3132. if await self.page.locator('div.media-status-content div.tag-inner:has-text("删除")').count():
  3133. await self.handle_upload_error(params.video_path)
  3134. await asyncio.sleep(3)
  3135. except:
  3136. await asyncio.sleep(3)
  3137. self.report_progress(60, "处理视频设置...")
  3138. # 添加短标题
  3139. try:
  3140. short_title_el = self.page.get_by_text("短标题", exact=True).locator("..").locator(
  3141. "xpath=following-sibling::div").locator('span input[type="text"]')
  3142. if await short_title_el.count():
  3143. short_title = format_short_title(params.title)
  3144. await short_title_el.fill(short_title)
  3145. except:
  3146. pass
  3147. # 定时发布
  3148. if params.publish_date:
  3149. self.report_progress(70, "设置定时发布...")
  3150. await self.set_schedule_time(params.publish_date)
  3151. self.report_progress(80, "正在发布...")
  3152. # 点击发布 - 参考 matrix
  3153. for i in range(30):
  3154. try:
  3155. # 参考 matrix: div.form-btns button:has-text("发表")
  3156. publish_btn = self.page.locator('div.form-btns button:has-text("发表")')
  3157. if await publish_btn.count():
  3158. print(f"[{self.platform_name}] 点击发布按钮...")
  3159. await publish_btn.click()
  3160. # 等待跳转到作品列表页面 - 参考 matrix
  3161. await self.page.wait_for_url(
  3162. "https://channels.weixin.qq.com/platform/post/list",
  3163. timeout=10000
  3164. )
  3165. self.report_progress(100, "发布成功")
  3166. print(f"[{self.platform_name}] 视频发布成功!")
  3167. screenshot_base64 = await self.capture_screenshot()
  3168. return PublishResult(
  3169. success=True,
  3170. platform=self.platform_name,
  3171. message="发布成功",
  3172. screenshot_base64=screenshot_base64,
  3173. page_url=self.page.url,
  3174. status='success'
  3175. )
  3176. except Exception as e:
  3177. current_url = self.page.url
  3178. if "https://channels.weixin.qq.com/platform/post/list" in current_url:
  3179. self.report_progress(100, "发布成功")
  3180. print(f"[{self.platform_name}] 视频发布成功!")
  3181. screenshot_base64 = await self.capture_screenshot()
  3182. return PublishResult(
  3183. success=True,
  3184. platform=self.platform_name,
  3185. message="发布成功",
  3186. screenshot_base64=screenshot_base64,
  3187. page_url=current_url,
  3188. status='success'
  3189. )
  3190. else:
  3191. print(f"[{self.platform_name}] 视频正在发布中... {i+1}/30, URL: {current_url}")
  3192. await asyncio.sleep(1)
  3193. # 发布超时
  3194. screenshot_base64 = await self.capture_screenshot()
  3195. page_url = await self.get_page_url()
  3196. return PublishResult(
  3197. success=False,
  3198. platform=self.platform_name,
  3199. error="发布超时,请检查发布状态",
  3200. screenshot_base64=screenshot_base64,
  3201. page_url=page_url,
  3202. status='need_action'
  3203. )
  3204. async def _get_works_fallback_dom(self, page_size: int) -> tuple:
  3205. """API 失败时从当前页面 DOM 抓取作品列表(兼容新账号/不同入口)"""
  3206. works: List[WorkItem] = []
  3207. total = 0
  3208. has_more = False
  3209. try:
  3210. for selector in ["div.post-feed-item", "[class*='post-feed']", "[class*='feed-item']", "div[class*='post']"]:
  3211. try:
  3212. await self.page.wait_for_selector(selector, timeout=8000)
  3213. break
  3214. except Exception:
  3215. continue
  3216. post_items = self.page.locator("div.post-feed-item")
  3217. item_count = await post_items.count()
  3218. if item_count == 0:
  3219. post_items = self.page.locator("[class*='post-feed']")
  3220. item_count = await post_items.count()
  3221. for i in range(min(item_count, page_size)):
  3222. try:
  3223. item = post_items.nth(i)
  3224. cover_el = item.locator("div.media img.thumb").first
  3225. cover_url = await cover_el.get_attribute("src") or "" if await cover_el.count() > 0 else ""
  3226. if not cover_url:
  3227. cover_el = item.locator("img").first
  3228. cover_url = await cover_el.get_attribute("src") or "" if await cover_el.count() > 0 else ""
  3229. title_el = item.locator("div.post-title").first
  3230. title = (await title_el.text_content() or "").strip() if await title_el.count() > 0 else ""
  3231. time_el = item.locator("div.post-time span").first
  3232. publish_time = (await time_el.text_content() or "").strip() if await time_el.count() > 0 else ""
  3233. play_count = like_count = comment_count = share_count = collect_count = 0
  3234. data_items = item.locator("div.post-data div.data-item")
  3235. for j in range(await data_items.count()):
  3236. data_item = data_items.nth(j)
  3237. count_text = (await data_item.locator("span.count").text_content() or "0").strip()
  3238. if await data_item.locator("span.weui-icon-outlined-eyes-on").count() > 0:
  3239. play_count = self._parse_count(count_text)
  3240. elif await data_item.locator("span.weui-icon-outlined-like").count() > 0:
  3241. like_count = self._parse_count(count_text)
  3242. elif await data_item.locator("span.weui-icon-outlined-comment").count() > 0:
  3243. comment_count = self._parse_count(count_text)
  3244. elif await data_item.locator("use[xlink\\:href='#icon-share']").count() > 0:
  3245. share_count = self._parse_count(count_text)
  3246. elif await data_item.locator("use[xlink\\:href='#icon-thumb']").count() > 0:
  3247. collect_count = self._parse_count(count_text)
  3248. work_id = f"weixin_{i}_{hash(title)}_{hash(publish_time)}"
  3249. works.append(WorkItem(
  3250. work_id=work_id,
  3251. title=title or "无标题",
  3252. cover_url=cover_url,
  3253. duration=0,
  3254. status="published",
  3255. publish_time=publish_time,
  3256. play_count=play_count,
  3257. like_count=like_count,
  3258. comment_count=comment_count,
  3259. share_count=share_count,
  3260. collect_count=collect_count,
  3261. ))
  3262. except Exception as e:
  3263. print(f"[{self.platform_name}] DOM 解析作品 {i} 失败: {e}", flush=True)
  3264. continue
  3265. total = len(works)
  3266. has_more = item_count > page_size
  3267. print(f"[{self.platform_name}] DOM 回退获取 {len(works)} 条", flush=True)
  3268. except Exception as e:
  3269. print(f"[{self.platform_name}] DOM 回退失败: {e}", flush=True)
  3270. return (works, total, has_more, "")
  3271. async def get_works(self, cookies: str, page: int = 0, page_size: int = 20) -> WorksResult:
  3272. """获取视频号作品列表(调用 post_list 接口)
  3273. page: 页码从 0 开始,或上一页返回的 rawKeyBuff/lastBuff 字符串
  3274. """
  3275. # 分页:首页 currentPage=1/rawKeyBuff=null,下一页用 currentPage 递增或 rawKeyBuff
  3276. if page is None or page == "" or (isinstance(page, int) and page == 0):
  3277. current_page = 1
  3278. raw_key_buff = None
  3279. elif isinstance(page, int):
  3280. current_page = page + 1
  3281. raw_key_buff = None
  3282. else:
  3283. current_page = 1
  3284. raw_key_buff = str(page)
  3285. ts_ms = str(int(time.time() * 1000))
  3286. print(f"\n{'='*60}")
  3287. print(f"[{self.platform_name}] 获取作品列表 currentPage={current_page}, pageSize={page_size}, rawKeyBuff={raw_key_buff[:40] if raw_key_buff else 'null'}...")
  3288. print(f"{'='*60}")
  3289. works: List[WorkItem] = []
  3290. total = 0
  3291. has_more = False
  3292. next_page = ""
  3293. try:
  3294. await self.init_browser()
  3295. cookie_list = self.parse_cookies(cookies)
  3296. await self.set_cookies(cookie_list)
  3297. if not self.page:
  3298. raise Exception("Page not initialized")
  3299. await self.page.goto("https://channels.weixin.qq.com/platform/post/list", timeout=30000)
  3300. await asyncio.sleep(3)
  3301. current_url = self.page.url
  3302. if "login" in current_url:
  3303. raise Exception("Cookie 已过期,请重新登录")
  3304. api_url = "https://channels.weixin.qq.com/micro/content/cgi-bin/mmfinderassistant-bin/post/post_list"
  3305. req_body = {
  3306. "pageSize": page_size,
  3307. "currentPage": current_page,
  3308. "userpageType": 11,
  3309. "stickyOrder": True,
  3310. "timestamp": ts_ms,
  3311. "_log_finder_uin": "",
  3312. "_log_finder_id": "",
  3313. "rawKeyBuff": raw_key_buff,
  3314. "pluginSessionId": None,
  3315. "scene": 7,
  3316. "reqScene": 7,
  3317. }
  3318. body_str = json.dumps(req_body)
  3319. response = await self.page.evaluate("""
  3320. async ([url, bodyStr]) => {
  3321. try {
  3322. const resp = await fetch(url, {
  3323. method: 'POST',
  3324. credentials: 'include',
  3325. headers: {
  3326. 'Content-Type': 'application/json',
  3327. 'Accept': '*/*',
  3328. 'Referer': 'https://channels.weixin.qq.com/platform/post/list'
  3329. },
  3330. body: bodyStr
  3331. });
  3332. return await resp.json();
  3333. } catch (e) {
  3334. return { error: e.toString() };
  3335. }
  3336. }
  3337. """, [api_url, body_str])
  3338. is_first_page = current_page == 1 and raw_key_buff is None
  3339. if response.get("error"):
  3340. print(f"[{self.platform_name}] API 请求失败: {response.get('error')}", flush=True)
  3341. if is_first_page:
  3342. works, total, has_more, next_page = await self._get_works_fallback_dom(page_size)
  3343. if works:
  3344. return WorksResult(success=True, platform=self.platform_name, works=works, total=total, has_more=has_more, next_page=next_page)
  3345. return WorksResult(success=False, platform=self.platform_name, error=response.get("error", "API 请求失败"))
  3346. err_code = response.get("errCode", -1)
  3347. if err_code != 0:
  3348. err_msg = response.get("errMsg", "unknown")
  3349. print(f"[{self.platform_name}] API errCode={err_code}, errMsg={err_msg}, 完整响应(前800字): {json.dumps(response, ensure_ascii=False)[:800]}", flush=True)
  3350. if is_first_page:
  3351. works, total, has_more, next_page = await self._get_works_fallback_dom(page_size)
  3352. if works:
  3353. return WorksResult(success=True, platform=self.platform_name, works=works, total=total, has_more=has_more, next_page=next_page)
  3354. return WorksResult(success=False, platform=self.platform_name, error=f"errCode={err_code}, errMsg={err_msg}")
  3355. data = response.get("data") or {}
  3356. raw_list = data.get("list") or []
  3357. total = int(data.get("totalCount") or 0)
  3358. has_more = bool(data.get("continueFlag", False))
  3359. next_page = (data.get("lastBuff") or "").strip()
  3360. print(f"[{self.platform_name}] API 响应: list_len={len(raw_list)}, totalCount={total}, continueFlag={has_more}, lastBuff={next_page[:50] if next_page else ''}...")
  3361. if is_first_page and len(raw_list) == 0:
  3362. works_fb, total_fb, has_more_fb, _ = await self._get_works_fallback_dom(page_size)
  3363. if works_fb:
  3364. return WorksResult(success=True, platform=self.platform_name, works=works_fb, total=total_fb, has_more=has_more_fb, next_page="")
  3365. for item in raw_list:
  3366. try:
  3367. # 存 works.platform_video_id 统一用 post_list 接口回参中的 exportId(如 export/xxx)
  3368. work_id = str(item.get("exportId") or item.get("objectId") or item.get("id") or "").strip()
  3369. if not work_id:
  3370. work_id = f"weixin_{hash(item.get('createTime',0))}_{hash(item.get('desc', {}).get('description',''))}"
  3371. desc = item.get("desc") or {}
  3372. title = (desc.get("description") or "").strip() or "无标题"
  3373. cover_url = ""
  3374. duration = 0
  3375. media_list = desc.get("media") or []
  3376. if media_list and isinstance(media_list[0], dict):
  3377. m = media_list[0]
  3378. cover_url = (m.get("coverUrl") or m.get("thumbUrl") or "").strip()
  3379. duration = int(m.get("videoPlayLen") or 0)
  3380. create_ts = item.get("createTime") or 0
  3381. if isinstance(create_ts, (int, float)) and create_ts:
  3382. publish_time = datetime.fromtimestamp(create_ts).strftime("%Y-%m-%d %H:%M:%S")
  3383. else:
  3384. publish_time = str(create_ts) if create_ts else ""
  3385. read_count = int(item.get("readCount") or 0)
  3386. like_count = int(item.get("likeCount") or 0)
  3387. comment_count = int(item.get("commentCount") or 0)
  3388. forward_count = int(item.get("forwardCount") or 0)
  3389. fav_count = int(item.get("favCount") or 0)
  3390. works.append(WorkItem(
  3391. work_id=work_id,
  3392. title=title,
  3393. cover_url=cover_url,
  3394. duration=duration,
  3395. status="published",
  3396. publish_time=publish_time,
  3397. play_count=read_count,
  3398. like_count=like_count,
  3399. comment_count=comment_count,
  3400. share_count=forward_count,
  3401. collect_count=fav_count,
  3402. ))
  3403. except Exception as e:
  3404. print(f"[{self.platform_name}] 解析作品项失败: {e}", flush=True)
  3405. continue
  3406. if total == 0 and works:
  3407. total = len(works)
  3408. print(f"[{self.platform_name}] 本页获取 {len(works)} 条,totalCount={total}, next_page={bool(next_page)}")
  3409. except Exception as e:
  3410. import traceback
  3411. traceback.print_exc()
  3412. return WorksResult(success=False, platform=self.platform_name, error=str(e))
  3413. return WorksResult(success=True, platform=self.platform_name, works=works, total=total, has_more=has_more, next_page=next_page)
  3414. async def get_comments(self, cookies: str, work_id: str, cursor: str = "") -> CommentsResult:
  3415. """
  3416. 获取视频号作品评论(完全参考 get_weixin_work_comments.py 的接口监听逻辑)
  3417. 支持递归提取二级评论,正确处理 parent_comment_id
  3418. """
  3419. print(f"\n{'='*60}")
  3420. print(f"[{self.platform_name}] 获取作品评论")
  3421. print(f"[{self.platform_name}] work_id={work_id}")
  3422. print(f"{'='*60}")
  3423. comments: List[CommentItem] = []
  3424. total = 0
  3425. has_more = False
  3426. try:
  3427. await self.init_browser()
  3428. cookie_list = self.parse_cookies(cookies)
  3429. await self.set_cookies(cookie_list)
  3430. if not self.page:
  3431. raise Exception("Page not initialized")
  3432. # 访问评论管理页面
  3433. print(f"[{self.platform_name}] 正在打开评论页面...")
  3434. await self.page.goto("https://channels.weixin.qq.com/platform/interaction/comment", timeout=30000)
  3435. await asyncio.sleep(2)
  3436. # 检查登录状态
  3437. current_url = self.page.url
  3438. if "login" in current_url:
  3439. raise Exception("Cookie 已过期,请重新登录")
  3440. # === 步骤1: 监听 post_list 接口获取作品列表 ===
  3441. posts = []
  3442. try:
  3443. async with self.page.expect_response(
  3444. lambda res: "/post/post_list" in res.url,
  3445. timeout=20000
  3446. ) as post_resp_info:
  3447. await self.page.wait_for_selector('.scroll-list .comment-feed-wrap', timeout=15000)
  3448. post_resp = await post_resp_info.value
  3449. post_data = await post_resp.json()
  3450. if post_data.get("errCode") == 0:
  3451. posts = post_data.get("data", {}).get("list", [])
  3452. print(f"[{self.platform_name}] ✅ 获取 {len(posts)} 个作品")
  3453. else:
  3454. err_msg = post_data.get("errMsg", "未知错误")
  3455. print(f"[{self.platform_name}] ❌ post_list 业务错误: {err_msg}")
  3456. return CommentsResult(
  3457. success=False,
  3458. platform=self.platform_name,
  3459. work_id=work_id,
  3460. error=f"post_list 业务错误: {err_msg}"
  3461. )
  3462. except Exception as e:
  3463. print(f"[{self.platform_name}] ❌ 获取 post_list 失败: {e}")
  3464. return CommentsResult(
  3465. success=False,
  3466. platform=self.platform_name,
  3467. work_id=work_id,
  3468. error=f"获取 post_list 失败: {e}"
  3469. )
  3470. # === 步骤2: 在 DOM 中查找目标作品 ===
  3471. feed_wraps = await self.page.query_selector_all('.scroll-list .comment-feed-wrap')
  3472. target_feed = None
  3473. target_post = None
  3474. target_index = -1
  3475. for i, feed in enumerate(feed_wraps):
  3476. if i >= len(posts):
  3477. break
  3478. post = posts[i]
  3479. object_nonce = post.get("objectNonce", "")
  3480. post_work_id = post.get("objectId", "") or object_nonce
  3481. # 匹配 work_id(支持 objectId 或 objectNonce 匹配)
  3482. if work_id in [post_work_id, object_nonce] or post_work_id in work_id or object_nonce in work_id:
  3483. target_feed = feed
  3484. target_post = post
  3485. target_index = i
  3486. work_title = post.get("desc", {}).get("description", "无标题")
  3487. print(f"[{self.platform_name}] ✅ 找到目标作品: {work_title}")
  3488. continue
  3489. if not target_feed or not target_post:
  3490. print(f"[{self.platform_name}] ❌ 未找到 work_id={work_id} 对应的作品")
  3491. return CommentsResult(
  3492. success=True,
  3493. platform=self.platform_name,
  3494. work_id=work_id,
  3495. comments=[],
  3496. total=0,
  3497. has_more=False
  3498. )
  3499. # 准备作品信息(用于递归函数)
  3500. object_nonce = target_post.get("objectNonce", f"nonce_{target_index}")
  3501. work_title = target_post.get("desc", {}).get("description", f"作品{target_index+1}")
  3502. work_info = {
  3503. "work_id": object_nonce,
  3504. "work_title": work_title
  3505. }
  3506. # === 步骤3: 点击作品触发 comment_list 接口 ===
  3507. content_wrap = await target_feed.query_selector('.feed-content') or target_feed
  3508. try:
  3509. async with self.page.expect_response(
  3510. lambda res: "/comment/comment_list" in res.url,
  3511. timeout=15000
  3512. ) as comment_resp_info:
  3513. await content_wrap.click()
  3514. await asyncio.sleep(0.8)
  3515. comment_resp = await comment_resp_info.value
  3516. comment_data = await comment_resp.json()
  3517. if comment_data.get("errCode") != 0:
  3518. err_msg = comment_data.get("errMsg", "未知错误")
  3519. print(f"[{self.platform_name}] ❌ 评论接口错误: {err_msg}")
  3520. return CommentsResult(
  3521. success=False,
  3522. platform=self.platform_name,
  3523. work_id=work_id,
  3524. error=f"评论接口错误: {err_msg}"
  3525. )
  3526. raw_comments = comment_data.get("data", {}).get("comment", [])
  3527. total = comment_data.get("data", {}).get("totalCount", len(raw_comments))
  3528. print(f"[{self.platform_name}] 📊 原始评论数: {len(raw_comments)}, 总数: {total}")
  3529. # === 步骤4: 递归提取所有评论(含子评论)===
  3530. extracted = self._extract_comments(raw_comments, parent_id="", work_info=work_info)
  3531. # === 步骤5: 转换为 CommentItem 列表(保留 weixin.py 的数据结构)===
  3532. for c in extracted:
  3533. # 使用接口返回的 comment_id
  3534. comment_id = c.get("comment_id", "")
  3535. parent_comment_id = c.get("parent_comment_id", "")
  3536. # 构建 CommentItem(保留原有数据结构用于数据库入库)
  3537. comment_item = CommentItem(
  3538. comment_id=comment_id,
  3539. parent_comment_id=parent_comment_id,
  3540. work_id=work_id,
  3541. content=c.get("content", ""),
  3542. author_id=c.get("username", ""), # 使用 username 作为 author_id
  3543. author_name=c.get("nickname", ""),
  3544. author_avatar=c.get("avatar", ""),
  3545. like_count=c.get("like_count", 0),
  3546. reply_count=0,
  3547. create_time=c.get("create_time", ""),
  3548. )
  3549. # 添加扩展字段(用于数据库存储和后续处理)
  3550. # comment_item.parent_comment_id = c.get("parent_comment_id", "")
  3551. comment_item.is_author = c.get("is_author", False)
  3552. comment_item.create_time_unix = c.get("create_time_unix", 0)
  3553. comment_item.work_title = c.get("work_title", "")
  3554. print(comment_item)
  3555. comments.append(comment_item)
  3556. # 打印日志
  3557. author_tag = " 👤(作者)" if c.get("is_author") else ""
  3558. parent_tag = f" [回复: {c.get('parent_comment_id', '')}]" if c.get("parent_comment_id") else ""
  3559. print(f"[{self.platform_name}] - [{c.get('nickname', '')}] {c.get('content', '')[:30]}... "
  3560. f"({c.get('create_time', '')}){author_tag}{parent_tag}")
  3561. # 判断是否还有更多(优先使用接口返回的 continueFlag,否则根据数量判断)
  3562. has_more = comment_data.get("data", {}).get("continueFlag", False) or len(extracted) < total
  3563. print(f"[{self.platform_name}] ✅ 共提取 {len(comments)} 条评论(含子评论)")
  3564. except Exception as e:
  3565. print(f"[{self.platform_name}] ❌ 获取评论失败: {e}")
  3566. import traceback
  3567. traceback.print_exc()
  3568. return CommentsResult(
  3569. success=False,
  3570. platform=self.platform_name,
  3571. work_id=work_id,
  3572. error=f"获取评论失败: {e}"
  3573. )
  3574. except Exception as e:
  3575. import traceback
  3576. traceback.print_exc()
  3577. return CommentsResult(
  3578. success=False,
  3579. platform=self.platform_name,
  3580. work_id=work_id,
  3581. error=str(e)
  3582. )
  3583. return CommentsResult(
  3584. success=True,
  3585. platform=self.platform_name,
  3586. work_id=work_id,
  3587. comments=comments,
  3588. total=total,
  3589. has_more=has_more
  3590. )
  3591. def _extract_comments(self, comment_list: list, parent_id: str = "", work_info: dict = None) -> list:
  3592. """
  3593. 递归提取一级和二级评论(完全参考 get_weixin_work_comments.py 的 extract_comments 函数)
  3594. Args:
  3595. comment_list: 评论列表(原始接口数据)
  3596. parent_id: 父评论ID(一级评论为空字符串"",二级评论为父级评论ID)
  3597. work_info: 作品信息字典
  3598. Returns:
  3599. list: 扁平化的评论列表,包含一级和二级评论
  3600. """
  3601. result = []
  3602. # 获取当前用户 username(用于判断是否为作者)
  3603. # 优先从环境变量获取,也可通过其他方式配置
  3604. my_username = getattr(self, 'my_username', '') or os.environ.get('WEIXIN_MY_USERNAME', '')
  3605. for cmt in comment_list:
  3606. # 处理时间戳
  3607. create_ts = int(cmt.get("commentCreatetime", 0) or 0)
  3608. readable_time = (
  3609. datetime.fromtimestamp(create_ts).strftime('%Y-%m-%d %H:%M:%S')
  3610. if create_ts > 0 else ""
  3611. )
  3612. # 判断是否作者(如果配置了 my_username)
  3613. username = cmt.get("username", "") or ""
  3614. is_author = (my_username != "") and (username == my_username)
  3615. # 构建评论条目 - 完全参考 get_weixin_work_comments.py 的字段
  3616. entry = {
  3617. "work_id": work_info.get("work_id", "") if work_info else "",
  3618. "work_title": work_info.get("work_title", "") if work_info else "",
  3619. "comment_id": cmt.get("commentId"),
  3620. "parent_comment_id": parent_id, # 关键:一级评论为空字符串"",二级评论为父评论ID
  3621. "username": username,
  3622. "nickname": cmt.get("commentNickname", ""),
  3623. "avatar": cmt.get("commentHeadurl", ""),
  3624. "content": cmt.get("commentContent", ""),
  3625. "create_time_unix": create_ts,
  3626. "create_time": readable_time,
  3627. "is_author": is_author,
  3628. "like_count": cmt.get("commentLikeCount", 0) or 0
  3629. }
  3630. result.append(entry)
  3631. # 递归处理二级评论(levelTwoComment)
  3632. # 关键:二级评论的 parent_id 应该是当前这条评论的 comment_id
  3633. level_two = cmt.get("levelTwoComment", []) or []
  3634. if level_two and isinstance(level_two, list) and len(level_two) > 0:
  3635. # 当前评论的 ID 作为其子评论的 parent_id
  3636. current_comment_id = cmt.get("commentId", "")
  3637. result.extend(
  3638. self._extract_comments(level_two, parent_id=current_comment_id, work_info=work_info)
  3639. )
  3640. return result
  3641. async def auto_reply_private_messages(self, cookies: str) -> dict:
  3642. """自动回复私信 - 集成自 pw3.py"""
  3643. print(f"\n{'='*60}")
  3644. print(f"[{self.platform_name}] 开始自动回复私信")
  3645. print(f"{'='*60}")
  3646. try:
  3647. await self.init_browser()
  3648. cookie_list = self.parse_cookies(cookies)
  3649. await self.set_cookies(cookie_list)
  3650. if not self.page:
  3651. raise Exception("Page not initialized")
  3652. # 访问私信页面
  3653. await self.page.goto("https://channels.weixin.qq.com/platform/private_msg", timeout=30000)
  3654. await asyncio.sleep(3)
  3655. # 检查登录状态
  3656. current_url = self.page.url
  3657. print(f"[{self.platform_name}] 当前 URL: {current_url}")
  3658. if "login" in current_url:
  3659. raise Exception("Cookie 已过期,请重新登录")
  3660. # 等待私信页面加载(使用多个选择器容错)
  3661. try:
  3662. await self.page.wait_for_selector('.private-msg-list-header', timeout=15000)
  3663. except:
  3664. # 尝试其他选择器
  3665. try:
  3666. await self.page.wait_for_selector('.weui-desktop-tab__navs__inner', timeout=10000)
  3667. print(f"[{self.platform_name}] 使用备用选择器加载成功")
  3668. except:
  3669. # 截图调试
  3670. screenshot_path = f"weixin_private_msg_{int(asyncio.get_event_loop().time())}.png"
  3671. await self.page.screenshot(path=screenshot_path)
  3672. print(f"[{self.platform_name}] 页面加载失败,截图: {screenshot_path}")
  3673. raise Exception(f"私信页面加载超时,当前 URL: {current_url}")
  3674. print(f"[{self.platform_name}] 私信页面加载完成")
  3675. # 处理两个 tab
  3676. total_replied = 0
  3677. for tab_name in ["打招呼消息", "私信"]:
  3678. replied_count = await self._process_tab_sessions(tab_name)
  3679. total_replied += replied_count
  3680. print(f"[{self.platform_name}] 自动回复完成,共回复 {total_replied} 条消息")
  3681. return {
  3682. 'success': True,
  3683. 'platform': self.platform_name,
  3684. 'replied_count': total_replied,
  3685. 'message': f'成功回复 {total_replied} 条私信'
  3686. }
  3687. except Exception as e:
  3688. import traceback
  3689. traceback.print_exc()
  3690. return {
  3691. 'success': False,
  3692. 'platform': self.platform_name,
  3693. 'error': str(e)
  3694. }
  3695. async def _process_tab_sessions(self, tab_name: str) -> int:
  3696. """处理指定 tab 下的所有会话"""
  3697. print(f"\n🔄 正在处理「{tab_name}」中的所有会话...")
  3698. if not self.page:
  3699. return 0
  3700. replied_count = 0
  3701. try:
  3702. # 点击 tab
  3703. if tab_name == "私信":
  3704. tab_link = self.page.locator('.weui-desktop-tab__navs__inner li').first.locator('a')
  3705. elif tab_name == "打招呼消息":
  3706. tab_link = self.page.locator('.weui-desktop-tab__navs__inner li').nth(1).locator('a')
  3707. else:
  3708. return 0
  3709. if await tab_link.is_visible():
  3710. await tab_link.click()
  3711. print(f" ➤ 已点击「{tab_name}」tab")
  3712. else:
  3713. print(f" ❌ 「{tab_name}」tab 不可见")
  3714. return 0
  3715. # 等待会话列表加载
  3716. try:
  3717. await self.page.wait_for_function("""
  3718. () => {
  3719. const hasSession = document.querySelectorAll('.session-wrap').length > 0;
  3720. const hasEmpty = !!document.querySelector('.empty-text');
  3721. return hasSession || hasEmpty;
  3722. }
  3723. """, timeout=8000)
  3724. print(" ✅ 会话列表区域已加载")
  3725. except:
  3726. print(" ⚠️ 等待会话列表超时,继续尝试读取...")
  3727. # 获取会话
  3728. session_wraps = self.page.locator('.session-wrap')
  3729. session_count = await session_wraps.count()
  3730. print(f" 💬 共找到 {session_count} 个会话")
  3731. if session_count == 0:
  3732. return 0
  3733. # 遍历每个会话
  3734. for idx in range(session_count):
  3735. try:
  3736. current_sessions = self.page.locator('.session-wrap')
  3737. if idx >= await current_sessions.count():
  3738. break
  3739. session = current_sessions.nth(idx)
  3740. user_name = await session.locator('.name').inner_text()
  3741. last_preview = await session.locator('.feed-info').inner_text()
  3742. print(f"\n ➤ [{idx+1}/{session_count}] 正在处理: {user_name} | 最后消息: {last_preview}")
  3743. await session.click()
  3744. await asyncio.sleep(2)
  3745. # 提取聊天历史
  3746. history = await self._extract_chat_history()
  3747. need_reply = (not history) or (not history[-1]["is_author"])
  3748. if need_reply:
  3749. reply_text = await self._generate_reply_with_ai(history)
  3750. if reply_text=="":
  3751. reply_text = self._generate_reply(history)
  3752. # # 生成回复
  3753. # if history and history[-1]["is_author"]:
  3754. # reply_text = await self._generate_reply_with_ai(history)
  3755. # else:
  3756. # reply_text = self._generate_reply(history)
  3757. if reply_text:
  3758. print(f" 📝 回复内容: {reply_text}")
  3759. try:
  3760. textarea = self.page.locator('.edit_area').first
  3761. send_btn = self.page.locator('button:has-text("发送")').first
  3762. if await textarea.is_visible() and await send_btn.is_visible():
  3763. await textarea.fill(reply_text)
  3764. await asyncio.sleep(0.5)
  3765. await send_btn.click()
  3766. print(" ✅ 已发送")
  3767. replied_count += 1
  3768. await asyncio.sleep(1.5)
  3769. else:
  3770. print(" ❌ 输入框或发送按钮不可见")
  3771. except Exception as e:
  3772. print(f" ❌ 发送失败: {e}")
  3773. else:
  3774. print(" ➤ 无需回复")
  3775. else:
  3776. print(" ➤ 最后一条是我发的,跳过回复")
  3777. except Exception as e:
  3778. print(f" ❌ 处理会话 {idx+1} 时出错: {e}")
  3779. continue
  3780. except Exception as e:
  3781. print(f"❌ 处理「{tab_name}」失败: {e}")
  3782. return replied_count
  3783. async def _extract_chat_history(self) -> list:
  3784. """精准提取聊天记录,区分作者(自己)和用户"""
  3785. if not self.page:
  3786. return []
  3787. history = []
  3788. message_wrappers = self.page.locator('.session-content-wrapper > div:not(.footer) > .text-wrapper')
  3789. count = await message_wrappers.count()
  3790. for i in range(count):
  3791. try:
  3792. wrapper = message_wrappers.nth(i)
  3793. # 判断方向
  3794. is_right = await wrapper.locator('.content-right').count() > 0
  3795. is_left = await wrapper.locator('.content-left').count() > 0
  3796. if not (is_left or is_right):
  3797. continue
  3798. # 提取消息文本
  3799. pre_el = wrapper.locator('pre.message-plain')
  3800. content = ''
  3801. if await pre_el.count() > 0:
  3802. content = await pre_el.inner_text()
  3803. content = content.strip()
  3804. if not content:
  3805. continue
  3806. # 获取头像
  3807. avatar_img = wrapper.locator('.avatar').first
  3808. avatar_src = ''
  3809. if await avatar_img.count() > 0:
  3810. avatar_src = await avatar_img.get_attribute("src") or ''
  3811. # 右侧 = 作者(自己)
  3812. is_author = is_right
  3813. # 获取用户名
  3814. if is_left:
  3815. name_el = wrapper.locator('.profile .name')
  3816. author_name = '用户'
  3817. if await name_el.count() > 0:
  3818. author_name = await name_el.inner_text()
  3819. else:
  3820. author_name = "我"
  3821. history.append({
  3822. "author": author_name,
  3823. "content": content,
  3824. "is_author": is_author,
  3825. "avatar": avatar_src
  3826. })
  3827. except Exception as e:
  3828. print(f" ⚠️ 解析第 {i+1} 条消息失败: {e}")
  3829. continue
  3830. return history
  3831. async def _generate_reply_with_ai(self, chat_history: list) -> str:
  3832. """使用 AI 生成智能回复"""
  3833. import requests
  3834. import json
  3835. try:
  3836. # 获取 AI 配置
  3837. ai_api_key = os.environ.get('DASHSCOPE_API_KEY', '')
  3838. ai_base_url = os.environ.get('DASHSCOPE_BASE_URL', 'https://dashscope.aliyuncs.com/compatible-mode/v1')
  3839. ai_model = os.environ.get('AI_MODEL', 'qwen-plus')
  3840. if not ai_api_key:
  3841. print("⚠️ 未配置 AI API Key,使用规则回复")
  3842. return self._generate_reply(chat_history)
  3843. # 构建对话上下文
  3844. messages = [{"role": "system", "content": "你是一个友好的微信视频号创作者助手,负责回复粉丝私信。请保持简洁、友好、专业的语气。回复长度不超过20字。"}]
  3845. for msg in chat_history:
  3846. role = "assistant" if msg["is_author"] else "user"
  3847. messages.append({
  3848. "role": role,
  3849. "content": msg["content"]
  3850. })
  3851. # 调用 AI API
  3852. headers = {
  3853. 'Authorization': f'Bearer {ai_api_key}',
  3854. 'Content-Type': 'application/json'
  3855. }
  3856. payload = {
  3857. "model": ai_model,
  3858. "messages": messages,
  3859. "max_tokens": 150,
  3860. "temperature": 0.8
  3861. }
  3862. print(" 🤖 正在调用 AI 生成回复...")
  3863. response = requests.post(
  3864. f"{ai_base_url}/chat/completions",
  3865. headers=headers,
  3866. json=payload,
  3867. timeout=30
  3868. )
  3869. if response.status_code != 200:
  3870. print(f" ⚠️ AI API 返回错误 {response.status_code},使用规则回复")
  3871. return self._generate_reply(chat_history)
  3872. result = response.json()
  3873. ai_reply = result.get('choices', [{}])[0].get('message', {}).get('content', '').strip()
  3874. if ai_reply:
  3875. print(f" ✅ AI 生成回复: {ai_reply}")
  3876. return ai_reply
  3877. else:
  3878. print(" ⚠️ AI 返回空内容,使用规则回复")
  3879. return self._generate_reply(chat_history)
  3880. except Exception as e:
  3881. print(f" ⚠️ AI 回复生成失败: {e},使用规则回复")
  3882. return self._generate_reply(chat_history)
  3883. def _generate_reply(self, chat_history: list) -> str:
  3884. """根据完整聊天历史生成回复(规则回复方式)"""
  3885. if not chat_history:
  3886. return "你好!感谢联系~"
  3887. # 检查最后一条是否是作者发的
  3888. if chat_history[-1]["is_author"]:
  3889. return "" # 不回复
  3890. # 找最后一条用户消息
  3891. last_user_msg = chat_history[-1]["content"]
  3892. # 简单规则回复
  3893. if "谢谢" in last_user_msg or "感谢" in last_user_msg:
  3894. return "不客气!欢迎常来交流~"
  3895. elif "你好" in last_user_msg or "在吗" in last_user_msg:
  3896. return "你好!请问有什么可以帮您的?"
  3897. elif "视频" in last_user_msg or "怎么拍" in last_user_msg:
  3898. return "视频是用手机拍摄的,注意光线和稳定哦!"
  3899. else:
  3900. return "收到!我会认真阅读您的留言~"
  3901. ================================================================================
  3902. 文件: server\python\platforms\xiaohongshu.py
  3903. ================================================================================
  3904. # -*- coding: utf-8 -*-
  3905. """
  3906. 小红书视频发布器
  3907. 参考: matrix/xhs_uploader/main.py
  3908. 使用 xhs SDK API 方式发布,更稳定
  3909. """
  3910. import asyncio
  3911. import os
  3912. import sys
  3913. import time
  3914. import concurrent.futures
  3915. from pathlib import Path
  3916. from typing import List
  3917. from .base import (
  3918. BasePublisher, PublishParams, PublishResult,
  3919. WorkItem, WorksResult, CommentItem, CommentsResult
  3920. )
  3921. from playwright.async_api import async_playwright
  3922. stored_cookies = None
  3923. # 添加 matrix 项目路径,用于导入签名脚本
  3924. MATRIX_PATH = Path(__file__).parent.parent.parent.parent / "matrix"
  3925. sys.path.insert(0, str(MATRIX_PATH))
  3926. # 尝试导入 xhs SDK
  3927. try:
  3928. from xhs import XhsClient
  3929. XHS_SDK_AVAILABLE = True
  3930. except ImportError:
  3931. print("[Warning] xhs 库未安装,请运行: pip install xhs")
  3932. XhsClient = None
  3933. XHS_SDK_AVAILABLE = False
  3934. # 签名脚本路径
  3935. STEALTH_JS_PATH = MATRIX_PATH / "xhs-api" / "js" / "stealth.min.js"
  3936. _xhs_sign_executor = concurrent.futures.ThreadPoolExecutor(max_workers=1)
  3937. class XiaohongshuPublisher(BasePublisher):
  3938. """
  3939. 小红书视频发布器
  3940. 优先使用 xhs SDK API 方式发布
  3941. """
  3942. platform_name = "xiaohongshu"
  3943. login_url = "https://creator.xiaohongshu.com/"
  3944. publish_url = "https://creator.xiaohongshu.com/publish/publish"
  3945. cookie_domain = ".xiaohongshu.com"
  3946. async def get_sign(self, uri: str, data=None, a1: str = "", web_session: str = ""):
  3947. """获取小红书 API 签名"""
  3948. from playwright.async_api import async_playwright
  3949. try:
  3950. async with async_playwright() as playwright:
  3951. browser = await playwright.chromium.launch(headless=True)
  3952. browser_context = await browser.new_context()
  3953. if STEALTH_JS_PATH.exists():
  3954. await browser_context.add_init_script(path=str(STEALTH_JS_PATH))
  3955. page = await browser_context.new_page()
  3956. await page.goto("https://www.xiaohongshu.com")
  3957. await asyncio.sleep(1)
  3958. await page.reload()
  3959. await asyncio.sleep(1)
  3960. if a1:
  3961. await browser_context.add_cookies([
  3962. {'name': 'a1', 'value': a1, 'domain': ".xiaohongshu.com", 'path': "/"}
  3963. ])
  3964. await page.reload()
  3965. await asyncio.sleep(0.5)
  3966. encrypt_params = await page.evaluate(
  3967. "([url, data]) => window._webmsxyw(url, data)",
  3968. [uri, data]
  3969. )
  3970. await browser_context.close()
  3971. await browser.close()
  3972. return {
  3973. "x-s": encrypt_params["X-s"],
  3974. "x-t": str(encrypt_params["X-t"])
  3975. }
  3976. except Exception as e:
  3977. import traceback
  3978. traceback.print_exc()
  3979. raise Exception(f"签名失败: {e}")
  3980. def sign_sync(self, uri, data=None, a1="", web_session=""):
  3981. """
  3982. 同步签名函数,供 XhsClient 使用。
  3983. 注意:发布流程运行在 asyncio 事件循环中(通过 asyncio.run 启动)。
  3984. XhsClient 以同步方式调用 sign 回调,但我们需要使用 Playwright Async API 进行签名。
  3985. 因此当处于事件循环中时,将签名逻辑放到独立线程里执行 asyncio.run。
  3986. """
  3987. def run_async_sign():
  3988. return asyncio.run(self.get_sign(uri, data=data, a1=a1, web_session=web_session))
  3989. try:
  3990. asyncio.get_running_loop()
  3991. future = _xhs_sign_executor.submit(run_async_sign)
  3992. return future.result(timeout=120)
  3993. except RuntimeError:
  3994. return run_async_sign()
  3995. async def publish_via_api(self, cookies: str, params: PublishParams) -> PublishResult:
  3996. """通过 API 发布视频"""
  3997. if not XHS_SDK_AVAILABLE:
  3998. raise Exception("xhs SDK 未安装,请运行: pip install xhs")
  3999. self.report_progress(10, "正在通过 API 发布...")
  4000. print(f"[{self.platform_name}] 使用 XHS SDK API 发布...")
  4001. print(f"[{self.platform_name}] 视频路径: {params.video_path}")
  4002. print(f"[{self.platform_name}] 标题: {params.title}")
  4003. # 转换 cookie 格式
  4004. cookie_list = self.parse_cookies(cookies)
  4005. cookie_string = self.cookies_to_string(cookie_list) if cookie_list else cookies
  4006. print(f"[{self.platform_name}] Cookie 长度: {len(cookie_string)}")
  4007. self.report_progress(20, "正在上传视频...")
  4008. async def ensure_valid_cookie_for_sdk() -> str | None:
  4009. await self.init_browser()
  4010. cookie_list_for_browser = self.parse_cookies(cookie_string)
  4011. await self.set_cookies(cookie_list_for_browser)
  4012. if not self.page or not self.context:
  4013. return None
  4014. await self.page.goto("https://creator.xiaohongshu.com/new/home", wait_until="domcontentloaded", timeout=60000)
  4015. await asyncio.sleep(2)
  4016. current_url = (self.page.url or '').lower()
  4017. if 'login' in current_url or 'passport' in current_url:
  4018. if self.headless:
  4019. return None
  4020. waited = 0
  4021. while waited < 180:
  4022. current_url = (self.page.url or '').lower()
  4023. if 'login' not in current_url and 'passport' not in current_url and 'creator.xiaohongshu.com' in current_url:
  4024. break
  4025. await asyncio.sleep(2)
  4026. waited += 2
  4027. current_url = (self.page.url or '').lower()
  4028. if 'login' in current_url or 'passport' in current_url:
  4029. return None
  4030. cookies_after = await self.context.cookies()
  4031. try:
  4032. await self.sync_cookies_to_node(cookies_after)
  4033. except Exception:
  4034. pass
  4035. refreshed_cookie_str = self.cookies_to_string(cookies_after)
  4036. return refreshed_cookie_str or None
  4037. def call_create_video_note(sdk_cookie_str: str):
  4038. xhs_client = XhsClient(sdk_cookie_str, sign=self.sign_sync)
  4039. return xhs_client.create_video_note(
  4040. title=params.title,
  4041. desc=params.description or params.title,
  4042. topics=params.tags or [],
  4043. post_time=params.publish_date.strftime("%Y-%m-%d %H:%M:%S") if params.publish_date else None,
  4044. video_path=params.video_path,
  4045. cover_path=params.cover_path if params.cover_path and os.path.exists(params.cover_path) else None
  4046. )
  4047. print(f"[{self.platform_name}] 开始调用 create_video_note...")
  4048. try:
  4049. result = call_create_video_note(cookie_string)
  4050. print(f"[{self.platform_name}] SDK 返回结果: {result}")
  4051. except Exception as e:
  4052. err_text = str(e)
  4053. if '无登录信息' in err_text or '"code": -100' in err_text or "'code': -100" in err_text:
  4054. self.report_progress(15, "登录信息失效,尝试刷新登录信息...")
  4055. refreshed = await ensure_valid_cookie_for_sdk()
  4056. if not refreshed:
  4057. screenshot_base64 = await self.capture_screenshot()
  4058. page_url = await self.get_page_url() if hasattr(self, 'get_page_url') else (self.page.url if self.page else "")
  4059. return PublishResult(
  4060. success=False,
  4061. platform=self.platform_name,
  4062. error="登录已过期,请使用有头浏览器重新登录",
  4063. screenshot_base64=screenshot_base64,
  4064. page_url=page_url,
  4065. status='need_captcha',
  4066. need_captcha=True,
  4067. captcha_type='login'
  4068. )
  4069. try:
  4070. result = call_create_video_note(refreshed)
  4071. print(f"[{self.platform_name}] SDK 重试返回结果: {result}")
  4072. except Exception as e2:
  4073. import traceback
  4074. traceback.print_exc()
  4075. raise Exception(f"XHS SDK 发布失败: {e2}")
  4076. else:
  4077. import traceback
  4078. traceback.print_exc()
  4079. print(f"[{self.platform_name}] SDK 调用失败: {e}")
  4080. raise Exception(f"XHS SDK 发布失败: {e}")
  4081. # 验证返回结果
  4082. if not result:
  4083. raise Exception("XHS SDK 返回空结果")
  4084. # 检查是否有错误
  4085. if isinstance(result, dict):
  4086. if result.get("code") and result.get("code") != 0:
  4087. raise Exception(f"发布失败: {result.get('msg', '未知错误')}")
  4088. if result.get("success") == False:
  4089. raise Exception(f"发布失败: {result.get('msg', result.get('error', '未知错误'))}")
  4090. note_id = result.get("note_id", "") if isinstance(result, dict) else ""
  4091. video_url = result.get("url", "") if isinstance(result, dict) else ""
  4092. if not note_id:
  4093. print(f"[{self.platform_name}] 警告: 未获取到 note_id,返回结果: {result}")
  4094. self.report_progress(100, "发布成功")
  4095. print(f"[{self.platform_name}] 发布成功! note_id={note_id}, url={video_url}")
  4096. return PublishResult(
  4097. success=True,
  4098. platform=self.platform_name,
  4099. video_id=note_id,
  4100. video_url=video_url,
  4101. message="发布成功"
  4102. )
  4103. async def publish(self, cookies: str, params: PublishParams) -> PublishResult:
  4104. """发布视频到小红书 - 参考 matrix/xhs_uploader/main.py"""
  4105. print(f"\n{'='*60}")
  4106. print(f"[{self.platform_name}] 开始发布视频")
  4107. print(f"[{self.platform_name}] 视频路径: {params.video_path}")
  4108. print(f"[{self.platform_name}] 标题: {params.title}")
  4109. print(f"[{self.platform_name}] Headless: {self.headless}")
  4110. print(f"[{self.platform_name}] XHS SDK 可用: {XHS_SDK_AVAILABLE}")
  4111. print(f"{'='*60}")
  4112. # 检查视频文件
  4113. if not os.path.exists(params.video_path):
  4114. raise Exception(f"视频文件不存在: {params.video_path}")
  4115. print(f"[{self.platform_name}] 视频文件存在,大小: {os.path.getsize(params.video_path)} bytes")
  4116. self.report_progress(5, "正在准备发布...")
  4117. if isinstance(getattr(self, 'proxy_config', None), dict) and self.proxy_config.get('server'):
  4118. print(f"[{self.platform_name}] 检测到代理配置,跳过 SDK 方式,使用 Playwright 走代理发布", flush=True)
  4119. return await self.publish_via_playwright(cookies, params)
  4120. # 参考 matrix: 优先使用 XHS SDK API 方式发布(更稳定)
  4121. if XHS_SDK_AVAILABLE:
  4122. try:
  4123. print(f"[{self.platform_name}] 尝试使用 XHS SDK API 发布...")
  4124. result = await self.publish_via_api(cookies, params)
  4125. print(f"[{self.platform_name}] API 发布完成: success={result.success}")
  4126. # 如果 API 返回成功,直接返回
  4127. if result.success:
  4128. return result
  4129. # 如果 API 返回失败但有具体错误,也返回
  4130. if result.error and "请刷新" not in result.error:
  4131. return result
  4132. # 其他情况尝试 Playwright 方式
  4133. print(f"[{self.platform_name}] API 方式未成功,尝试 Playwright...")
  4134. except Exception as e:
  4135. err_text = str(e)
  4136. if '登录已过期' in err_text or '无登录信息' in err_text:
  4137. print(f"[{self.platform_name}] API 登录失效,切换到 Playwright 方式...", flush=True)
  4138. else:
  4139. import traceback
  4140. traceback.print_exc()
  4141. print(f"[{self.platform_name}] API 发布失败: {e}")
  4142. print(f"[{self.platform_name}] 尝试使用 Playwright 方式...")
  4143. # 使用 Playwright 方式发布
  4144. print(f"[{self.platform_name}] 使用 Playwright 方式发布...")
  4145. return await self.publish_via_playwright(cookies, params)
  4146. async def publish_via_playwright(self, cookies: str, params: PublishParams) -> PublishResult:
  4147. """通过 Playwright 发布视频"""
  4148. self.report_progress(10, "正在初始化浏览器...")
  4149. print(f"[{self.platform_name}] Playwright 方式开始...")
  4150. await self.init_browser()
  4151. cookie_list = self.parse_cookies(cookies)
  4152. print(f"[{self.platform_name}] 设置 {len(cookie_list)} 个 cookies")
  4153. await self.set_cookies(cookie_list)
  4154. if not self.page:
  4155. raise Exception("Page not initialized")
  4156. self.report_progress(15, "正在打开发布页面...")
  4157. # 直接访问视频发布页面
  4158. publish_url = "https://creator.xiaohongshu.com/publish/publish?source=official"
  4159. print(f"[{self.platform_name}] 打开页面: {publish_url}")
  4160. await self.page.goto(publish_url)
  4161. await asyncio.sleep(3)
  4162. current_url = self.page.url
  4163. print(f"[{self.platform_name}] 当前 URL: {current_url}")
  4164. async def wait_for_manual_login(timeout_seconds: int = 300) -> bool:
  4165. if not self.page:
  4166. return False
  4167. self.report_progress(12, "检测到需要登录,请在浏览器窗口完成登录...")
  4168. try:
  4169. await self.page.bring_to_front()
  4170. except:
  4171. pass
  4172. waited = 0
  4173. while waited < timeout_seconds:
  4174. try:
  4175. url = self.page.url
  4176. if "login" not in url and "passport" not in url and "creator.xiaohongshu.com" in url:
  4177. return True
  4178. await asyncio.sleep(2)
  4179. waited += 2
  4180. except:
  4181. await asyncio.sleep(2)
  4182. waited += 2
  4183. return False
  4184. async def wait_for_manual_captcha(timeout_seconds: int = 180) -> bool:
  4185. waited = 0
  4186. while waited < timeout_seconds:
  4187. try:
  4188. ai_captcha = await self.ai_check_captcha()
  4189. if not ai_captcha.get("has_captcha"):
  4190. return True
  4191. except:
  4192. pass
  4193. await asyncio.sleep(3)
  4194. waited += 3
  4195. return False
  4196. # 检查登录状态
  4197. if "login" in current_url or "passport" in current_url:
  4198. if not self.headless:
  4199. logged_in = await wait_for_manual_login()
  4200. if logged_in:
  4201. try:
  4202. if self.context:
  4203. cookies_after = await self.context.cookies()
  4204. await self.sync_cookies_to_node(cookies_after)
  4205. except:
  4206. pass
  4207. await self.page.goto(publish_url)
  4208. await asyncio.sleep(3)
  4209. current_url = self.page.url
  4210. else:
  4211. screenshot_base64 = await self.capture_screenshot()
  4212. return PublishResult(
  4213. success=False,
  4214. platform=self.platform_name,
  4215. error="需要登录:请在浏览器窗口完成登录后重试",
  4216. screenshot_base64=screenshot_base64,
  4217. page_url=current_url,
  4218. status='need_captcha',
  4219. need_captcha=True,
  4220. captcha_type='login'
  4221. )
  4222. else:
  4223. screenshot_base64 = await self.capture_screenshot()
  4224. return PublishResult(
  4225. success=False,
  4226. platform=self.platform_name,
  4227. error="登录已过期,请重新登录",
  4228. screenshot_base64=screenshot_base64,
  4229. page_url=current_url,
  4230. status='need_captcha',
  4231. need_captcha=True,
  4232. captcha_type='login'
  4233. )
  4234. # 使用 AI 检查验证码
  4235. ai_captcha = await self.ai_check_captcha()
  4236. if ai_captcha['has_captcha']:
  4237. print(f"[{self.platform_name}] AI检测到验证码: {ai_captcha['captcha_type']}", flush=True)
  4238. if not self.headless:
  4239. solved = await wait_for_manual_captcha()
  4240. if solved:
  4241. try:
  4242. if self.context:
  4243. cookies_after = await self.context.cookies()
  4244. await self.sync_cookies_to_node(cookies_after)
  4245. except:
  4246. pass
  4247. else:
  4248. screenshot_base64 = await self.capture_screenshot()
  4249. return PublishResult(
  4250. success=False,
  4251. platform=self.platform_name,
  4252. error=f"需要验证码:请在浏览器窗口完成验证后重试",
  4253. screenshot_base64=screenshot_base64,
  4254. page_url=current_url,
  4255. status='need_captcha',
  4256. need_captcha=True,
  4257. captcha_type=ai_captcha['captcha_type']
  4258. )
  4259. else:
  4260. screenshot_base64 = await self.capture_screenshot()
  4261. return PublishResult(
  4262. success=False,
  4263. platform=self.platform_name,
  4264. error=f"检测到{ai_captcha['captcha_type']}验证码,需要使用有头浏览器完成验证",
  4265. screenshot_base64=screenshot_base64,
  4266. page_url=current_url,
  4267. status='need_captcha',
  4268. need_captcha=True,
  4269. captcha_type=ai_captcha['captcha_type']
  4270. )
  4271. self.report_progress(20, "正在上传视频...")
  4272. # 等待页面加载
  4273. await asyncio.sleep(2)
  4274. # 上传视频
  4275. upload_triggered = False
  4276. # 方法1: 直接设置隐藏的 file input
  4277. print(f"[{self.platform_name}] 尝试方法1: 设置 file input")
  4278. file_inputs = self.page.locator('input[type="file"]')
  4279. input_count = await file_inputs.count()
  4280. print(f"[{self.platform_name}] 找到 {input_count} 个 file input")
  4281. if input_count > 0:
  4282. # 找到接受视频的 input
  4283. for i in range(input_count):
  4284. input_el = file_inputs.nth(i)
  4285. accept = await input_el.get_attribute('accept') or ''
  4286. print(f"[{self.platform_name}] Input {i} accept: {accept}")
  4287. if 'video' in accept or '*' in accept or not accept:
  4288. await input_el.set_input_files(params.video_path)
  4289. upload_triggered = True
  4290. print(f"[{self.platform_name}] 视频文件已设置到 input {i}")
  4291. break
  4292. # 方法2: 点击上传区域触发文件选择器
  4293. if not upload_triggered:
  4294. print(f"[{self.platform_name}] 尝试方法2: 点击上传区域")
  4295. try:
  4296. upload_area = self.page.locator('[class*="upload-wrapper"], [class*="upload-area"], .upload-input').first
  4297. if await upload_area.count() > 0:
  4298. async with self.page.expect_file_chooser(timeout=5000) as fc_info:
  4299. await upload_area.click()
  4300. file_chooser = await fc_info.value
  4301. await file_chooser.set_files(params.video_path)
  4302. upload_triggered = True
  4303. print(f"[{self.platform_name}] 通过点击上传区域上传成功")
  4304. except Exception as e:
  4305. print(f"[{self.platform_name}] 方法2失败: {e}")
  4306. if not upload_triggered:
  4307. screenshot_base64 = await self.capture_screenshot()
  4308. page_url = await self.get_page_url()
  4309. return PublishResult(
  4310. success=False,
  4311. platform=self.platform_name,
  4312. error="无法上传视频文件",
  4313. screenshot_base64=screenshot_base64,
  4314. page_url=page_url,
  4315. status='need_action'
  4316. )
  4317. self.report_progress(40, "等待视频上传完成...")
  4318. print(f"[{self.platform_name}] 等待视频上传和处理...")
  4319. # 等待上传完成(检测页面变化)
  4320. upload_complete = False
  4321. for i in range(60): # 最多等待3分钟
  4322. await asyncio.sleep(3)
  4323. # 检查是否有标题输入框(上传完成后出现)
  4324. title_input_count = await self.page.locator('input[placeholder*="标题"], input[placeholder*="填写标题"]').count()
  4325. # 或者检查编辑器区域
  4326. editor_count = await self.page.locator('[class*="ql-editor"], [contenteditable="true"]').count()
  4327. # 检查发布按钮是否可见
  4328. publish_btn_count = await self.page.locator('.publishBtn, button:has-text("发布")').count()
  4329. print(f"[{self.platform_name}] 检测 {i+1}: 标题框={title_input_count}, 编辑器={editor_count}, 发布按钮={publish_btn_count}")
  4330. if title_input_count > 0 or (editor_count > 0 and publish_btn_count > 0):
  4331. upload_complete = True
  4332. print(f"[{self.platform_name}] 视频上传完成!")
  4333. break
  4334. if not upload_complete:
  4335. screenshot_base64 = await self.capture_screenshot()
  4336. page_url = await self.get_page_url()
  4337. return PublishResult(
  4338. success=False,
  4339. platform=self.platform_name,
  4340. error="视频上传超时",
  4341. screenshot_base64=screenshot_base64,
  4342. page_url=page_url,
  4343. status='need_action'
  4344. )
  4345. await asyncio.sleep(2)
  4346. self.report_progress(60, "正在填写笔记信息...")
  4347. print(f"[{self.platform_name}] 填写标题: {params.title[:20]}")
  4348. # 填写标题
  4349. title_filled = False
  4350. title_selectors = [
  4351. 'input[placeholder*="标题"]',
  4352. 'input[placeholder*="填写标题"]',
  4353. '[class*="title"] input',
  4354. '.c-input_inner',
  4355. ]
  4356. for selector in title_selectors:
  4357. title_input = self.page.locator(selector).first
  4358. if await title_input.count() > 0:
  4359. await title_input.click()
  4360. await title_input.fill('') # 先清空
  4361. await title_input.fill(params.title[:20])
  4362. title_filled = True
  4363. print(f"[{self.platform_name}] 标题已填写,使用选择器: {selector}")
  4364. break
  4365. if not title_filled:
  4366. print(f"[{self.platform_name}] 警告: 未找到标题输入框")
  4367. # 填写描述和标签
  4368. if params.description or params.tags:
  4369. desc_filled = False
  4370. desc_selectors = [
  4371. '[class*="ql-editor"]',
  4372. '[class*="content-input"] [contenteditable="true"]',
  4373. '[class*="editor"] [contenteditable="true"]',
  4374. '.ql-editor',
  4375. ]
  4376. for selector in desc_selectors:
  4377. desc_input = self.page.locator(selector).first
  4378. if await desc_input.count() > 0:
  4379. await desc_input.click()
  4380. await asyncio.sleep(0.5)
  4381. if params.description:
  4382. await self.page.keyboard.type(params.description, delay=20)
  4383. print(f"[{self.platform_name}] 描述已填写")
  4384. if params.tags:
  4385. # 添加标签
  4386. await self.page.keyboard.press("Enter")
  4387. for tag in params.tags[:5]: # 最多5个标签
  4388. await self.page.keyboard.type(f"#{tag}", delay=20)
  4389. await asyncio.sleep(0.3)
  4390. await self.page.keyboard.press("Space")
  4391. print(f"[{self.platform_name}] 标签已填写: {params.tags[:5]}")
  4392. desc_filled = True
  4393. break
  4394. if not desc_filled:
  4395. print(f"[{self.platform_name}] 警告: 未找到描述输入框")
  4396. await asyncio.sleep(2)
  4397. self.report_progress(80, "正在发布...")
  4398. await asyncio.sleep(2)
  4399. # 滚动到页面底部确保发布按钮可见
  4400. await self.page.evaluate("window.scrollTo(0, document.body.scrollHeight)")
  4401. await asyncio.sleep(1)
  4402. print(f"[{self.platform_name}] 查找发布按钮...")
  4403. # 点击发布
  4404. publish_selectors = [
  4405. 'button.publishBtn',
  4406. '.publishBtn',
  4407. 'button.d-button.red',
  4408. 'button:has-text("发布"):not(:has-text("定时发布"))',
  4409. '[class*="publish"][class*="btn"]',
  4410. ]
  4411. publish_clicked = False
  4412. for selector in publish_selectors:
  4413. try:
  4414. btn = self.page.locator(selector).first
  4415. if await btn.count() > 0:
  4416. is_visible = await btn.is_visible()
  4417. is_enabled = await btn.is_enabled()
  4418. print(f"[{self.platform_name}] 按钮 {selector}: visible={is_visible}, enabled={is_enabled}")
  4419. if is_visible and is_enabled:
  4420. box = await btn.bounding_box()
  4421. if box:
  4422. print(f"[{self.platform_name}] 点击发布按钮: {selector}, 位置: ({box['x']}, {box['y']})")
  4423. # 使用真实鼠标点击
  4424. await self.page.mouse.click(box['x'] + box['width']/2, box['y'] + box['height']/2)
  4425. publish_clicked = True
  4426. break
  4427. except Exception as e:
  4428. print(f"[{self.platform_name}] 选择器 {selector} 错误: {e}")
  4429. if not publish_clicked:
  4430. try:
  4431. suggest = await self.ai_suggest_playwright_selector("点击小红书发布按钮")
  4432. if suggest.get("has_selector") and suggest.get("selector"):
  4433. sel = suggest.get("selector")
  4434. btn = self.page.locator(sel).first
  4435. if await btn.count() > 0 and await btn.is_visible() and await btn.is_enabled():
  4436. try:
  4437. await btn.click()
  4438. except:
  4439. box = await btn.bounding_box()
  4440. if box:
  4441. await self.page.mouse.click(box['x'] + box['width']/2, box['y'] + box['height']/2)
  4442. publish_clicked = True
  4443. except Exception as e:
  4444. print(f"[{self.platform_name}] AI 点击发布按钮失败: {e}", flush=True)
  4445. if not publish_clicked:
  4446. # 保存截图用于调试
  4447. screenshot_path = f"debug_publish_failed_{self.platform_name}.png"
  4448. await self.page.screenshot(path=screenshot_path, full_page=True)
  4449. print(f"[{self.platform_name}] 未找到发布按钮,截图保存到: {screenshot_path}")
  4450. # 打印页面 HTML 结构用于调试
  4451. buttons = await self.page.query_selector_all('button')
  4452. print(f"[{self.platform_name}] 页面上共有 {len(buttons)} 个按钮")
  4453. for i, btn in enumerate(buttons[:10]):
  4454. text = await btn.text_content() or ''
  4455. cls = await btn.get_attribute('class') or ''
  4456. print(f" 按钮 {i}: text='{text.strip()[:30]}', class='{cls[:50]}'")
  4457. raise Exception("未找到发布按钮")
  4458. print(f"[{self.platform_name}] 已点击发布按钮,等待发布完成...")
  4459. self.report_progress(90, "等待发布结果...")
  4460. # 等待发布完成(检测 URL 变化或成功提示)
  4461. publish_success = False
  4462. refresh_retry = 0
  4463. for i in range(20): # 最多等待 20 秒
  4464. await asyncio.sleep(1)
  4465. current_url = self.page.url
  4466. # 检查是否跳转到发布成功页面或内容管理页面
  4467. if "published=true" in current_url or "success" in current_url or "content" in current_url:
  4468. publish_success = True
  4469. print(f"[{self.platform_name}] 发布成功! 跳转到: {current_url}")
  4470. break
  4471. # 检查是否有成功提示
  4472. try:
  4473. success_msg = await self.page.locator('[class*="success"], .toast-success, [class*="Toast"]').first.is_visible()
  4474. if success_msg:
  4475. publish_success = True
  4476. print(f"[{self.platform_name}] 检测到成功提示!")
  4477. break
  4478. except:
  4479. pass
  4480. # 检查是否有错误提示
  4481. try:
  4482. error_elements = self.page.locator('[class*="error"], .toast-error, [class*="fail"]')
  4483. if await error_elements.count() > 0:
  4484. first_error = error_elements.first
  4485. if await first_error.is_visible():
  4486. error_text = (await first_error.text_content()) or ''
  4487. error_text = error_text.strip()
  4488. if error_text:
  4489. if '请刷新' in error_text and refresh_retry < 3:
  4490. refresh_retry += 1
  4491. print(f"[{self.platform_name}] 检测到临时错误: {error_text},尝试刷新并重试发布({refresh_retry}/3)", flush=True)
  4492. try:
  4493. await self.page.reload(wait_until="domcontentloaded")
  4494. except Exception:
  4495. pass
  4496. await asyncio.sleep(2)
  4497. await self.page.evaluate("window.scrollTo(0, document.body.scrollHeight)")
  4498. await asyncio.sleep(1)
  4499. republish_clicked = False
  4500. for selector in publish_selectors:
  4501. try:
  4502. btn = self.page.locator(selector).first
  4503. if await btn.count() > 0 and await btn.is_visible() and await btn.is_enabled():
  4504. try:
  4505. await btn.click()
  4506. except:
  4507. box = await btn.bounding_box()
  4508. if box:
  4509. await self.page.mouse.click(box['x'] + box['width']/2, box['y'] + box['height']/2)
  4510. republish_clicked = True
  4511. break
  4512. except:
  4513. continue
  4514. continue
  4515. screenshot_base64 = await self.capture_screenshot()
  4516. page_url = await self.get_page_url()
  4517. return PublishResult(
  4518. success=False,
  4519. platform=self.platform_name,
  4520. error=f"发布失败: {error_text}",
  4521. screenshot_base64=screenshot_base64,
  4522. page_url=page_url,
  4523. status='failed'
  4524. )
  4525. except Exception as e:
  4526. if "发布失败" in str(e):
  4527. raise
  4528. # 如果没有明确的成功标志,返回截图供 AI 分析
  4529. if not publish_success:
  4530. final_url = self.page.url
  4531. print(f"[{self.platform_name}] 发布结果不确定,当前 URL: {final_url}")
  4532. screenshot_base64 = await self.capture_screenshot()
  4533. print(f"[{self.platform_name}] 已获取截图供 AI 分析")
  4534. # 如果 URL 还是发布页面,可能需要继续操作
  4535. if "publish/publish" in final_url:
  4536. return PublishResult(
  4537. success=False,
  4538. platform=self.platform_name,
  4539. error="发布结果待确认,请查看截图",
  4540. screenshot_base64=screenshot_base64,
  4541. page_url=final_url,
  4542. status='need_action'
  4543. )
  4544. self.report_progress(100, "发布完成")
  4545. print(f"[{self.platform_name}] Playwright 方式发布完成!")
  4546. screenshot_base64 = await self.capture_screenshot()
  4547. page_url = await self.get_page_url()
  4548. return PublishResult(
  4549. success=True,
  4550. platform=self.platform_name,
  4551. message="发布完成",
  4552. screenshot_base64=screenshot_base64,
  4553. page_url=page_url,
  4554. status='success'
  4555. )
  4556. async def get_account_info(self, cookies: str) -> dict:
  4557. """获取账号信息"""
  4558. print(f"\n{'='*60}")
  4559. print(f"[{self.platform_name}] 获取账号信息")
  4560. print(f"{'='*60}")
  4561. captured_info = {}
  4562. try:
  4563. await self.init_browser()
  4564. cookie_list = self.parse_cookies(cookies)
  4565. await self.set_cookies(cookie_list)
  4566. if not self.page:
  4567. raise Exception("Page not initialized")
  4568. # 监听个人信息 API
  4569. async def handle_response(response):
  4570. nonlocal captured_info
  4571. if 'api/galaxy/creator/home/personal_info' in response.url:
  4572. try:
  4573. json_data = await response.json()
  4574. print(f"[{self.platform_name}] 捕获个人信息 API", flush=True)
  4575. if json_data.get('success') or json_data.get('code') == 0:
  4576. data = json_data.get('data', {})
  4577. captured_info = {
  4578. "account_id": f"xhs_{data.get('red_num', '')}",
  4579. "account_name": data.get('name', ''),
  4580. "avatar_url": data.get('avatar', ''),
  4581. "fans_count": data.get('fans_count', 0),
  4582. "works_count": 0 # 暂时无法直接获取准确的作品数,需要从作品列表获取
  4583. }
  4584. except Exception as e:
  4585. print(f"[{self.platform_name}] 解析个人信息失败: {e}", flush=True)
  4586. self.page.on('response', handle_response)
  4587. # 访问首页
  4588. print(f"[{self.platform_name}] 访问创作者首页...", flush=True)
  4589. await self.page.goto("https://creator.xiaohongshu.com/new/home", wait_until="domcontentloaded")
  4590. # 等待 API 响应
  4591. for _ in range(10):
  4592. if captured_info:
  4593. break
  4594. await asyncio.sleep(1)
  4595. if not captured_info:
  4596. print(f"[{self.platform_name}] 未捕获到个人信息,尝试刷新...", flush=True)
  4597. await self.page.reload()
  4598. for _ in range(10):
  4599. if captured_info:
  4600. break
  4601. await asyncio.sleep(1)
  4602. if not captured_info:
  4603. raise Exception("无法获取账号信息")
  4604. # 尝试获取作品数(从首页或其他地方)
  4605. # 或者简单地返回已获取的信息,作品数由 get_works 更新
  4606. return {
  4607. "success": True,
  4608. **captured_info
  4609. }
  4610. except Exception as e:
  4611. import traceback
  4612. traceback.print_exc()
  4613. return {
  4614. "success": False,
  4615. "error": str(e)
  4616. }
  4617. finally:
  4618. await self.close_browser()
  4619. async def get_works(self, cookies: str, page: int = 0, page_size: int = 20) -> WorksResult:
  4620. """获取小红书作品列表 - 通过直接调用创作者笔记列表 API 获取"""
  4621. print(f"\n{'='*60}", flush=True)
  4622. print(f"[{self.platform_name}] 获取作品列表", flush=True)
  4623. print(f"[{self.platform_name}] page={page}, page_size={page_size}", flush=True)
  4624. print(f"{'='*60}", flush=True)
  4625. works: List[WorkItem] = []
  4626. total = 0
  4627. has_more = False
  4628. next_page = ""
  4629. api_page_size = 20
  4630. try:
  4631. await self.init_browser()
  4632. cookie_list = self.parse_cookies(cookies)
  4633. # 打印 cookies 信息用于调试
  4634. print(f"[{self.platform_name}] 解析到 {len(cookie_list)} 个 cookies", flush=True)
  4635. await self.set_cookies(cookie_list)
  4636. if not self.page:
  4637. raise Exception("Page not initialized")
  4638. # 访问笔记管理页面 - 页面会自动发起 API 请求
  4639. print(f"[{self.platform_name}] 访问笔记管理页面...", flush=True)
  4640. try:
  4641. await self.page.goto("https://creator.xiaohongshu.com/new/note-manager", wait_until="domcontentloaded", timeout=30000)
  4642. except Exception as nav_error:
  4643. print(f"[{self.platform_name}] 导航超时,但继续尝试: {nav_error}", flush=True)
  4644. # 检查登录状态
  4645. current_url = self.page.url
  4646. print(f"[{self.platform_name}] 当前页面: {current_url}", flush=True)
  4647. if "login" in current_url:
  4648. raise Exception("Cookie 已过期,请重新登录")
  4649. # 等待页面完全加载,确保签名函数可用
  4650. print(f"[{self.platform_name}] 等待页面完全加载和签名函数初始化...", flush=True)
  4651. await asyncio.sleep(3)
  4652. # 检查签名函数是否可用
  4653. sign_check_attempts = 0
  4654. max_sign_check_attempts = 10
  4655. while sign_check_attempts < max_sign_check_attempts:
  4656. sign_available = await self.page.evaluate("""() => {
  4657. return typeof window !== 'undefined' && typeof window._webmsxyw === 'function';
  4658. }""")
  4659. if sign_available:
  4660. print(f"[{self.platform_name}] ✓ 签名函数 _webmsxyw 已可用", flush=True)
  4661. break
  4662. sign_check_attempts += 1
  4663. print(f"[{self.platform_name}] ⏳ 等待签名函数... ({sign_check_attempts}/{max_sign_check_attempts})", flush=True)
  4664. await asyncio.sleep(1)
  4665. if sign_check_attempts >= max_sign_check_attempts:
  4666. print(f"[{self.platform_name}] ⚠️ 警告: 签名函数 _webmsxyw 在 {max_sign_check_attempts} 次检查后仍不可用", flush=True)
  4667. print(f"[{self.platform_name}] 继续尝试,但 API 调用可能会失败", flush=True)
  4668. async def fetch_notes_page(p):
  4669. # 再次检查签名函数(每次调用前都检查)
  4670. sign_available = await self.page.evaluate("""() => {
  4671. return typeof window !== 'undefined' && typeof window._webmsxyw === 'function';
  4672. }""")
  4673. if not sign_available:
  4674. print(f"[{self.platform_name}] ⚠️ 签名函数 _webmsxyw 不可用,等待...", flush=True)
  4675. await asyncio.sleep(2)
  4676. return await self.page.evaluate(
  4677. """async (pageNum) => {
  4678. try {
  4679. // 使用正确的 API 端点:/api/galaxy/v2/creator/note/user/posted
  4680. const url = `/api/galaxy/v2/creator/note/user/posted?tab=0&page=${pageNum}`;
  4681. const headers = {
  4682. 'Accept': 'application/json, text/plain, */*',
  4683. 'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6',
  4684. 'Referer': 'https://creator.xiaohongshu.com/new/note-manager',
  4685. 'Sec-Fetch-Dest': 'empty',
  4686. 'Sec-Fetch-Mode': 'cors',
  4687. 'Sec-Fetch-Site': 'same-origin'
  4688. };
  4689. // 尝试获取签名
  4690. let signResult = { hasSign: false, x_s: '', x_t: '', x_s_common: '', error: '' };
  4691. if (typeof window !== 'undefined' && typeof window._webmsxyw === 'function') {
  4692. try {
  4693. const sign = window._webmsxyw(url, '');
  4694. headers['x-s'] = sign['X-s'];
  4695. headers['x-t'] = String(sign['X-t']);
  4696. // 检查是否有 x-s-common
  4697. if (sign['X-s-common']) {
  4698. headers['x-s-common'] = sign['X-s-common'];
  4699. }
  4700. signResult = {
  4701. hasSign: true,
  4702. x_s: sign['X-s'] ? sign['X-s'].substring(0, 50) + '...' : '',
  4703. x_t: String(sign['X-t']),
  4704. x_s_common: sign['X-s-common'] ? sign['X-s-common'].substring(0, 50) + '...' : '',
  4705. error: ''
  4706. };
  4707. console.log('签名生成成功:', signResult);
  4708. } catch (e) {
  4709. signResult.error = e.toString();
  4710. console.error('签名生成失败:', e);
  4711. }
  4712. } else {
  4713. signResult.error = '_webmsxyw function not found';
  4714. console.error('签名函数不存在');
  4715. }
  4716. const res = await fetch(url, {
  4717. method: 'GET',
  4718. credentials: 'include',
  4719. headers
  4720. });
  4721. const responseData = await res.json();
  4722. return {
  4723. ...responseData,
  4724. _debug: {
  4725. signResult: signResult,
  4726. status: res.status,
  4727. statusText: res.statusText
  4728. }
  4729. };
  4730. } catch (e) {
  4731. return { success: false, error: e.toString() };
  4732. }
  4733. }""",
  4734. p
  4735. )
  4736. def parse_notes(notes_list):
  4737. parsed = []
  4738. for note in notes_list:
  4739. note_id = note.get('id', '')
  4740. if not note_id:
  4741. continue
  4742. cover_url = ''
  4743. images_list = note.get('images_list', [])
  4744. if images_list:
  4745. cover_url = images_list[0].get('url', '')
  4746. if cover_url.startswith('http://'):
  4747. cover_url = cover_url.replace('http://', 'https://')
  4748. duration = note.get('video_info', {}).get('duration', 0)
  4749. status = 'published'
  4750. tab_status = note.get('tab_status', 1)
  4751. if tab_status == 0:
  4752. status = 'draft'
  4753. elif tab_status == 2:
  4754. status = 'reviewing'
  4755. elif tab_status == 3:
  4756. status = 'rejected'
  4757. video_url = f"https://www.xiaohongshu.com/explore/{note_id}" if note_id else ""
  4758. parsed.append(WorkItem(
  4759. work_id=note_id,
  4760. title=note.get('display_title', '') or '无标题',
  4761. cover_url=cover_url,
  4762. video_url=video_url,
  4763. duration=duration,
  4764. status=status,
  4765. publish_time=note.get('time', ''),
  4766. play_count=note.get('view_count', 0),
  4767. like_count=note.get('likes', 0),
  4768. comment_count=note.get('comments_count', 0),
  4769. share_count=note.get('shared_count', 0),
  4770. collect_count=note.get('collected_count', 0),
  4771. ))
  4772. return parsed
  4773. resp = None
  4774. for attempt in range(1, 4):
  4775. resp = await fetch_notes_page(page)
  4776. # 打印调试信息
  4777. if resp and isinstance(resp, dict) and resp.get('_debug'):
  4778. debug_info = resp.get('_debug', {})
  4779. sign_result = debug_info.get('signResult', {})
  4780. print(f"[{self.platform_name}] 🔍 调试信息: 签名可用: {sign_result.get('hasSign', False)}, X-S: {sign_result.get('x_s', '')}, X-T: {sign_result.get('x_t', '')}, X-S-Common: {sign_result.get('x_s_common', '')}, 签名错误: {sign_result.get('error', '')}, HTTP 状态: {debug_info.get('status', 'N/A')}", flush=True)
  4781. resp.pop('_debug', None)
  4782. if resp and (resp.get('success') or resp.get('code') == 0) and resp.get('data'):
  4783. break
  4784. print(f"[{self.platform_name}] 拉取作品列表失败,重试 {attempt}/3: {str(resp)[:200]}", flush=True)
  4785. await asyncio.sleep(1.2 * attempt)
  4786. if not resp or not (resp.get('success') or resp.get('code') == 0) or not resp.get('data'):
  4787. error_msg = resp.get('msg') if isinstance(resp, dict) else str(resp)
  4788. # 打印详细的错误信息
  4789. if isinstance(resp, dict):
  4790. if resp.get('msg'):
  4791. print(f"[{self.platform_name}] 错误消息: {resp.get('msg')}", flush=True)
  4792. if resp.get('message'):
  4793. print(f"[{self.platform_name}] 错误消息: {resp.get('message')}", flush=True)
  4794. if resp.get('error'):
  4795. print(f"[{self.platform_name}] 错误: {resp.get('error')}", flush=True)
  4796. raise Exception(f"无法获取作品列表数据: {error_msg}")
  4797. data = resp.get('data', {}) or {}
  4798. notes = data.get('notes', []) or []
  4799. print(f"[{self.platform_name}] 第 {page} 页 notes 数量: {len(notes)}", flush=True)
  4800. tags = data.get('tags', []) or []
  4801. if tags:
  4802. preferred = 0
  4803. for tag in tags:
  4804. if tag.get('id') == 'special.note_time_desc':
  4805. preferred = tag.get('notes_count', 0) or tag.get('notesCount', 0) or tag.get('count', 0) or 0
  4806. break
  4807. if preferred:
  4808. total = preferred
  4809. else:
  4810. total = max([int(t.get('notes_count', 0) or t.get('notesCount', 0) or t.get('count', 0) or 0) for t in tags] + [0])
  4811. if not total:
  4812. total = int(data.get('total', 0) or data.get('total_count', 0) or data.get('totalCount', 0) or 0)
  4813. if not total and isinstance(data.get('page', {}), dict):
  4814. total = int(data.get('page', {}).get('total', 0) or data.get('page', {}).get('totalCount', 0) or 0)
  4815. next_page = data.get('page', "")
  4816. if next_page == page:
  4817. next_page = page + 1
  4818. works.extend(parse_notes(notes))
  4819. if total:
  4820. has_more = (page * api_page_size + len(notes)) < total
  4821. if has_more and (next_page == -1 or str(next_page) == "-1" or next_page == "" or next_page is None):
  4822. next_page = page + 1
  4823. else:
  4824. if len(notes) == 0:
  4825. has_more = False
  4826. else:
  4827. next_resp = await fetch_notes_page(page + 1)
  4828. next_data = (next_resp or {}).get('data', {}) if isinstance(next_resp, dict) else {}
  4829. next_notes = next_data.get('notes', []) or []
  4830. has_more = len(next_notes) > 0
  4831. next_page = next_data.get('page', next_page)
  4832. except Exception as e:
  4833. import traceback
  4834. print(f"[{self.platform_name}] 发生异常: {e}", flush=True)
  4835. traceback.print_exc()
  4836. return WorksResult(
  4837. success=False,
  4838. platform=self.platform_name,
  4839. error=str(e)
  4840. )
  4841. finally:
  4842. # 确保关闭浏览器
  4843. await self.close_browser()
  4844. return WorksResult(
  4845. success=True,
  4846. platform=self.platform_name,
  4847. works=works,
  4848. total=total or (page * api_page_size + len(works)),
  4849. has_more=has_more,
  4850. next_page=next_page
  4851. )
  4852. async def get_all_works(self, cookies: str) -> WorksResult:
  4853. """获取小红书全部作品(单次请求内自动翻页抓全量,避免 Node 侧分页不一致)"""
  4854. print(f"\n{'='*60}", flush=True)
  4855. print(f"[{self.platform_name}] 获取全部作品(auto paging)", flush=True)
  4856. print(f"{'='*60}", flush=True)
  4857. works: List[WorkItem] = []
  4858. total = 0
  4859. seen_ids = set()
  4860. cursor: object = 0
  4861. max_iters = 800
  4862. api_page_size = 20
  4863. try:
  4864. await self.init_browser()
  4865. cookie_list = self.parse_cookies(cookies)
  4866. print(f"[{self.platform_name}] 解析到 {len(cookie_list)} 个 cookies", flush=True)
  4867. await self.set_cookies(cookie_list)
  4868. if not self.page:
  4869. raise Exception("Page not initialized")
  4870. print(f"[{self.platform_name}] 访问笔记管理页面...", flush=True)
  4871. try:
  4872. await self.page.goto("https://creator.xiaohongshu.com/new/note-manager", wait_until="domcontentloaded", timeout=60000)
  4873. print(f"[{self.platform_name}] 页面加载成功", flush=True)
  4874. except Exception as nav_error:
  4875. print(f"[{self.platform_name}] 导航超时,但继续尝试: {nav_error}", flush=True)
  4876. # 即使超时也检查当前页面状态
  4877. try:
  4878. await asyncio.sleep(2)
  4879. current_url = self.page.url
  4880. print(f"[{self.platform_name}] 超时后当前页面: {current_url}", flush=True)
  4881. except Exception as e:
  4882. print(f"[{self.platform_name}] 检查页面状态时出错: {e}", flush=True)
  4883. current_url = self.page.url
  4884. print(f"[{self.platform_name}] 当前页面: {current_url}", flush=True)
  4885. if "login" in current_url:
  4886. raise Exception("Cookie 已过期,请重新登录")
  4887. # 等待页面完全加载,确保签名函数可用
  4888. print(f"[{self.platform_name}] 等待页面完全加载和签名函数初始化...", flush=True)
  4889. await asyncio.sleep(3)
  4890. # 检查签名函数是否可用
  4891. sign_check_attempts = 0
  4892. max_sign_check_attempts = 10
  4893. while sign_check_attempts < max_sign_check_attempts:
  4894. sign_available = await self.page.evaluate("""() => {
  4895. return typeof window !== 'undefined' && typeof window._webmsxyw === 'function';
  4896. }""")
  4897. if sign_available:
  4898. print(f"[{self.platform_name}] ✓ 签名函数 _webmsxyw 已可用", flush=True)
  4899. break
  4900. sign_check_attempts += 1
  4901. print(f"[{self.platform_name}] ⏳ 等待签名函数... ({sign_check_attempts}/{max_sign_check_attempts})", flush=True)
  4902. await asyncio.sleep(1)
  4903. if sign_check_attempts >= max_sign_check_attempts:
  4904. print(f"[{self.platform_name}] ⚠️ 警告: 签名函数 _webmsxyw 在 {max_sign_check_attempts} 次检查后仍不可用", flush=True)
  4905. print(f"[{self.platform_name}] 继续尝试,但 API 调用可能会失败", flush=True)
  4906. async def fetch_notes_page(p):
  4907. # 再次检查签名函数(每次调用前都检查)
  4908. sign_available = await self.page.evaluate("""() => {
  4909. return typeof window !== 'undefined' && typeof window._webmsxyw === 'function';
  4910. }""")
  4911. if not sign_available:
  4912. print(f"[{self.platform_name}] ⚠️ 签名函数 _webmsxyw 不可用,等待...", flush=True)
  4913. await asyncio.sleep(2)
  4914. return await self.page.evaluate(
  4915. """async (pageNum) => {
  4916. try {
  4917. // 使用正确的 API 端点:/api/galaxy/v2/creator/note/user/posted
  4918. const url = `/api/galaxy/v2/creator/note/user/posted?tab=0&page=${pageNum}`;
  4919. const headers = {
  4920. 'Accept': 'application/json, text/plain, */*',
  4921. 'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6',
  4922. 'Referer': 'https://creator.xiaohongshu.com/new/note-manager',
  4923. 'Sec-Fetch-Dest': 'empty',
  4924. 'Sec-Fetch-Mode': 'cors',
  4925. 'Sec-Fetch-Site': 'same-origin'
  4926. };
  4927. // 尝试获取签名
  4928. let signResult = { hasSign: false, x_s: '', x_t: '', x_s_common: '', error: '' };
  4929. if (typeof window !== 'undefined' && typeof window._webmsxyw === 'function') {
  4930. try {
  4931. const sign = window._webmsxyw(url, '');
  4932. headers['x-s'] = sign['X-s'];
  4933. headers['x-t'] = String(sign['X-t']);
  4934. // 检查是否有 x-s-common
  4935. if (sign['X-s-common']) {
  4936. headers['x-s-common'] = sign['X-s-common'];
  4937. }
  4938. signResult = {
  4939. hasSign: true,
  4940. x_s: sign['X-s'] ? sign['X-s'].substring(0, 50) + '...' : '',
  4941. x_t: String(sign['X-t']),
  4942. x_s_common: sign['X-s-common'] ? sign['X-s-common'].substring(0, 50) + '...' : '',
  4943. error: ''
  4944. };
  4945. console.log('签名生成成功:', signResult);
  4946. } catch (e) {
  4947. signResult.error = e.toString();
  4948. console.error('签名生成失败:', e);
  4949. }
  4950. } else {
  4951. signResult.error = '_webmsxyw function not found';
  4952. console.error('签名函数不存在');
  4953. }
  4954. const res = await fetch(url, {
  4955. method: 'GET',
  4956. credentials: 'include',
  4957. headers
  4958. });
  4959. const responseData = await res.json();
  4960. return {
  4961. ...responseData,
  4962. _debug: {
  4963. signResult: signResult,
  4964. status: res.status,
  4965. statusText: res.statusText
  4966. }
  4967. };
  4968. } catch (e) {
  4969. return { success: false, error: e.toString() };
  4970. }
  4971. }""",
  4972. p
  4973. )
  4974. def parse_notes(notes_list):
  4975. parsed = []
  4976. for note in notes_list:
  4977. note_id = note.get('id', '')
  4978. if not note_id:
  4979. continue
  4980. cover_url = ''
  4981. images_list = note.get('images_list', [])
  4982. if images_list:
  4983. cover_url = images_list[0].get('url', '')
  4984. if cover_url.startswith('http://'):
  4985. cover_url = cover_url.replace('http://', 'https://')
  4986. duration = note.get('video_info', {}).get('duration', 0)
  4987. status = 'published'
  4988. tab_status = note.get('tab_status', 1)
  4989. if tab_status == 0:
  4990. status = 'draft'
  4991. elif tab_status == 2:
  4992. status = 'reviewing'
  4993. elif tab_status == 3:
  4994. status = 'rejected'
  4995. video_url = f"https://www.xiaohongshu.com/explore/{note_id}" if note_id else ""
  4996. parsed.append(WorkItem(
  4997. work_id=note_id,
  4998. title=note.get('display_title', '') or '无标题',
  4999. cover_url=cover_url,
  5000. video_url=video_url,
  5001. duration=duration,
  5002. status=status,
  5003. publish_time=note.get('time', ''),
  5004. play_count=note.get('view_count', 0),
  5005. like_count=note.get('likes', 0),
  5006. comment_count=note.get('comments_count', 0),
  5007. share_count=note.get('shared_count', 0),
  5008. collect_count=note.get('collected_count', 0),
  5009. ))
  5010. return parsed
  5011. async def collect_by_scrolling() -> WorksResult:
  5012. print(f"[{self.platform_name}] 直连接口被拒绝,切换为滚动页面 + 监听 API 响应模式", flush=True)
  5013. captured: List[WorkItem] = []
  5014. captured_total = 0
  5015. captured_seen = set()
  5016. lock = asyncio.Lock()
  5017. async def handle_response(response):
  5018. nonlocal captured_total
  5019. url = response.url
  5020. if ("creator.xiaohongshu.com" not in url and "edith.xiaohongshu.com" not in url) or "creator/note/user/posted" not in url:
  5021. return
  5022. try:
  5023. json_data = await response.json()
  5024. except Exception:
  5025. return
  5026. if not isinstance(json_data, dict):
  5027. return
  5028. if not (json_data.get("success") or json_data.get("code") == 0) or not json_data.get("data"):
  5029. return
  5030. data = json_data.get("data", {}) or {}
  5031. notes = data.get("notes", []) or []
  5032. tags = data.get("tags", []) or []
  5033. declared = 0
  5034. if tags:
  5035. preferred = 0
  5036. for tag in tags:
  5037. if tag.get("id") == "special.note_time_desc":
  5038. preferred = tag.get("notes_count", 0) or tag.get("notesCount", 0) or tag.get("count", 0) or 0
  5039. break
  5040. if preferred:
  5041. declared = int(preferred)
  5042. else:
  5043. declared = max([int(t.get("notes_count", 0) or t.get("notesCount", 0) or t.get("count", 0) or 0) for t in tags] + [0])
  5044. if not declared:
  5045. declared = int(data.get("total", 0) or data.get("total_count", 0) or data.get("totalCount", 0) or 0)
  5046. if not declared and isinstance(data.get("page", {}), dict):
  5047. declared = int(data.get("page", {}).get("total", 0) or data.get("page", {}).get("totalCount", 0) or 0)
  5048. async with lock:
  5049. if declared:
  5050. captured_total = max(captured_total, declared)
  5051. parsed = parse_notes(notes)
  5052. new_count = 0
  5053. for w in parsed:
  5054. if w.work_id and w.work_id not in captured_seen:
  5055. captured_seen.add(w.work_id)
  5056. captured.append(w)
  5057. new_count += 1
  5058. if new_count > 0:
  5059. print(
  5060. f"[{self.platform_name}] 捕获 notes 响应: notes={len(notes)}, new={new_count}, total_now={len(captured)}, declared_total={captured_total}",
  5061. flush=True
  5062. )
  5063. self.page.on("response", handle_response)
  5064. try:
  5065. try:
  5066. # 使用更宽松的等待条件,避免超时
  5067. await self.page.goto("https://creator.xiaohongshu.com/new/note-manager", wait_until="domcontentloaded", timeout=90000)
  5068. print(f"[{self.platform_name}] 页面加载成功", flush=True)
  5069. except Exception as nav_error:
  5070. print(f"[{self.platform_name}] 导航异常(继续):{nav_error}", flush=True)
  5071. # 即使超时也继续尝试,可能页面已经部分加载
  5072. try:
  5073. await asyncio.sleep(3)
  5074. current_url = self.page.url
  5075. print(f"[{self.platform_name}] 超时后当前页面: {current_url}", flush=True)
  5076. if "login" in current_url:
  5077. raise Exception("Cookie 已过期,请重新登录")
  5078. except Exception as e:
  5079. if "Cookie" in str(e):
  5080. raise
  5081. print(f"[{self.platform_name}] 检查页面状态时出错: {e}", flush=True)
  5082. await asyncio.sleep(2.0)
  5083. idle_rounds = 0
  5084. last_count = 0
  5085. last_height = 0
  5086. for _ in range(1, 400):
  5087. scroll_state = await self.page.evaluate(
  5088. """() => {
  5089. const isScrollable = (el) => {
  5090. if (!el) return false;
  5091. const style = window.getComputedStyle(el);
  5092. const oy = style.overflowY;
  5093. return (oy === 'auto' || oy === 'scroll') && (el.scrollHeight - el.clientHeight > 200);
  5094. };
  5095. const pickBest = () => {
  5096. const nodes = Array.from(document.querySelectorAll('*'));
  5097. let best = document.scrollingElement || document.documentElement || document.body;
  5098. let bestScroll = (best.scrollHeight || 0) - (best.clientHeight || 0);
  5099. for (const el of nodes) {
  5100. if (!isScrollable(el)) continue;
  5101. const diff = el.scrollHeight - el.clientHeight;
  5102. if (diff > bestScroll) {
  5103. best = el;
  5104. bestScroll = diff;
  5105. }
  5106. }
  5107. return best;
  5108. };
  5109. const el = pickBest();
  5110. const beforeTop = el.scrollTop || 0;
  5111. const beforeHeight = el.scrollHeight || 0;
  5112. el.scrollTo(0, beforeHeight);
  5113. return {
  5114. beforeTop,
  5115. afterTop: el.scrollTop || 0,
  5116. height: el.scrollHeight || 0,
  5117. client: el.clientHeight || 0,
  5118. };
  5119. }"""
  5120. )
  5121. await asyncio.sleep(1.2)
  5122. async with lock:
  5123. count_now = len(captured)
  5124. total_now = captured_total
  5125. if total_now and count_now >= total_now:
  5126. break
  5127. height_now = int(scroll_state.get("height", 0) or 0) if isinstance(scroll_state, dict) else 0
  5128. if count_now == last_count and height_now == last_height:
  5129. idle_rounds += 1
  5130. else:
  5131. idle_rounds = 0
  5132. last_count = count_now
  5133. last_height = height_now
  5134. if idle_rounds >= 6:
  5135. break
  5136. async with lock:
  5137. final_works = list(captured)
  5138. final_total = captured_total or len(final_works)
  5139. return WorksResult(
  5140. success=True,
  5141. platform=self.platform_name,
  5142. works=final_works,
  5143. total=final_total,
  5144. has_more=False,
  5145. next_page=-1
  5146. )
  5147. finally:
  5148. try:
  5149. self.page.remove_listener("response", handle_response)
  5150. except Exception:
  5151. pass
  5152. # 添加请求监听,捕获请求头信息
  5153. captured_requests = []
  5154. async def handle_request(request):
  5155. url = request.url
  5156. if ("creator.xiaohongshu.com" in url or "edith.xiaohongshu.com" in url) and "creator/note/user/posted" in url:
  5157. headers = request.headers
  5158. captured_requests.append({
  5159. "url": url,
  5160. "method": request.method,
  5161. "headers": dict(headers),
  5162. "timestamp": asyncio.get_event_loop().time()
  5163. })
  5164. # 打印关键头部信息
  5165. x_s = headers.get('x-s', '')
  5166. x_t = headers.get('x-t', '')
  5167. x_s_common = headers.get('x-s-common', '')
  5168. print(f"[{self.platform_name}] 📡 API 请求: {url}", flush=True)
  5169. print(f"[{self.platform_name}] Method: {request.method}", flush=True)
  5170. print(f"[{self.platform_name}] X-S: {x_s[:50] if x_s else '(none)'}...", flush=True)
  5171. print(f"[{self.platform_name}] X-T: {x_t}", flush=True)
  5172. print(f"[{self.platform_name}] X-S-Common: {x_s_common[:50] if x_s_common else '(none)'}...", flush=True)
  5173. print(f"[{self.platform_name}] Cookie: {headers.get('cookie', '')[:100]}...", flush=True)
  5174. self.page.on("request", handle_request)
  5175. iters = 0
  5176. page_count = 0 # 统计实际获取到的页数
  5177. print(f"[{self.platform_name}] ========== 开始自动分页获取作品 ==========", flush=True)
  5178. print(f"[{self.platform_name}] 最大迭代次数: {max_iters}, 每页大小: {api_page_size}", flush=True)
  5179. while iters < max_iters:
  5180. iters += 1
  5181. print(f"\n[{self.platform_name}] ---------- 第 {iters} 次请求 (cursor={cursor}) ----------", flush=True)
  5182. resp = await fetch_notes_page(cursor)
  5183. # 打印调试信息
  5184. if resp and isinstance(resp, dict) and resp.get('_debug'):
  5185. debug_info = resp.get('_debug', {})
  5186. sign_result = debug_info.get('signResult', {})
  5187. print(f"[{self.platform_name}] 🔍 调试信息:", flush=True)
  5188. print(f"[{self.platform_name}] 签名可用: {sign_result.get('hasSign', False)}", flush=True)
  5189. if sign_result.get('x_s'):
  5190. print(f"[{self.platform_name}] X-S: {sign_result.get('x_s', '')}", flush=True)
  5191. if sign_result.get('x_t'):
  5192. print(f"[{self.platform_name}] X-T: {sign_result.get('x_t', '')}", flush=True)
  5193. if sign_result.get('error'):
  5194. print(f"[{self.platform_name}] 签名错误: {sign_result.get('error', '')}", flush=True)
  5195. print(f"[{self.platform_name}] HTTP 状态: {debug_info.get('status', 'N/A')} {debug_info.get('statusText', '')}", flush=True)
  5196. # 移除调试信息,避免影响后续处理
  5197. resp.pop('_debug', None)
  5198. if not resp or not isinstance(resp, dict):
  5199. print(f"[{self.platform_name}] ❌ 第 {iters} 次拉取无响应,cursor={cursor}", flush=True)
  5200. print(f"[{self.platform_name}] 响应类型: {type(resp)}, 响应内容: {str(resp)[:500]}", flush=True)
  5201. break
  5202. if not (resp.get('success') or resp.get('code') == 0) or not resp.get('data'):
  5203. error_msg = str(resp)[:500]
  5204. print(f"[{self.platform_name}] ❌ 拉取失败 cursor={cursor}", flush=True)
  5205. print(f"[{self.platform_name}] 响应详情: {error_msg}", flush=True)
  5206. print(f"[{self.platform_name}] success={resp.get('success')}, code={resp.get('code')}, has_data={bool(resp.get('data'))}", flush=True)
  5207. # 打印详细的错误信息
  5208. if resp.get('msg'):
  5209. print(f"[{self.platform_name}] 错误消息: {resp.get('msg')}", flush=True)
  5210. if resp.get('message'):
  5211. print(f"[{self.platform_name}] 错误消息: {resp.get('message')}", flush=True)
  5212. if resp.get('error'):
  5213. print(f"[{self.platform_name}] 错误: {resp.get('error')}", flush=True)
  5214. # 打印调试信息
  5215. if resp.get('_debug'):
  5216. debug_info = resp.get('_debug', {})
  5217. print(f"[{self.platform_name}] HTTP 状态: {debug_info.get('status', 'N/A')} {debug_info.get('statusText', '')}", flush=True)
  5218. sign_result = debug_info.get('signResult', {})
  5219. if sign_result.get('error'):
  5220. print(f"[{self.platform_name}] 签名错误: {sign_result.get('error')}", flush=True)
  5221. if iters == 1:
  5222. print(f"[{self.platform_name}] 第一次请求失败,切换到滚动模式", flush=True)
  5223. return await collect_by_scrolling()
  5224. break
  5225. data = resp.get('data', {}) or {}
  5226. notes = data.get('notes', []) or []
  5227. if not notes:
  5228. print(f"[{self.platform_name}] ⚠️ cursor={cursor} 无作品,停止分页", flush=True)
  5229. break
  5230. # 统计页数
  5231. page_count += 1
  5232. print(f"[{self.platform_name}] ✅ 第 {page_count} 页获取成功,本页作品数: {len(notes)}", flush=True)
  5233. tags = data.get('tags', []) or []
  5234. if tags:
  5235. preferred = 0
  5236. for tag in tags:
  5237. if tag.get('id') == 'special.note_time_desc':
  5238. preferred = tag.get('notes_count', 0) or tag.get('notesCount', 0) or tag.get('count', 0) or 0
  5239. break
  5240. if preferred:
  5241. total = max(total, int(preferred))
  5242. print(f"[{self.platform_name}] 📊 从 tags 获取总数: {total} (preferred)", flush=True)
  5243. else:
  5244. tag_total = max([int(t.get('notes_count', 0) or t.get('notesCount', 0) or t.get('count', 0) or 0) for t in tags] + [0])
  5245. total = max(total, tag_total)
  5246. if tag_total > 0:
  5247. print(f"[{self.platform_name}] 📊 从 tags 获取总数: {total}", flush=True)
  5248. if not total:
  5249. t2 = int(data.get('total', 0) or data.get('total_count', 0) or data.get('totalCount', 0) or 0)
  5250. if not t2 and isinstance(data.get('page', {}), dict):
  5251. t2 = int(data.get('page', {}).get('total', 0) or data.get('page', {}).get('totalCount', 0) or 0)
  5252. total = max(total, t2)
  5253. if t2 > 0:
  5254. print(f"[{self.platform_name}] 📊 从 data.total 获取总数: {total}", flush=True)
  5255. parsed = parse_notes(notes)
  5256. new_items = []
  5257. for w in parsed:
  5258. if w.work_id and w.work_id not in seen_ids:
  5259. seen_ids.add(w.work_id)
  5260. new_items.append(w)
  5261. works.extend(new_items)
  5262. print(f"[{self.platform_name}] 📈 累计统计: 本页新作品={len(new_items)}, 累计作品数={len(works)}, 声明总数={total}", flush=True)
  5263. if total and len(works) >= total:
  5264. print(f"[{self.platform_name}] ✅ 已获取全部作品 (累计={len(works)} >= 总数={total}),停止分页", flush=True)
  5265. break
  5266. if len(new_items) == 0:
  5267. print(f"[{self.platform_name}] ⚠️ 本页无新作品,停止分页", flush=True)
  5268. break
  5269. next_page = data.get('page', "")
  5270. old_cursor = cursor
  5271. if next_page == cursor:
  5272. next_page = ""
  5273. if next_page == -1 or str(next_page) == "-1":
  5274. next_page = ""
  5275. if next_page is None or next_page == "":
  5276. if isinstance(cursor, int):
  5277. cursor = cursor + 1
  5278. else:
  5279. cursor = len(works) // api_page_size
  5280. print(f"[{self.platform_name}] 🔄 下一页 cursor: {old_cursor} -> {cursor} (自动递增)", flush=True)
  5281. else:
  5282. cursor = next_page
  5283. print(f"[{self.platform_name}] 🔄 下一页 cursor: {old_cursor} -> {cursor} (API返回)", flush=True)
  5284. await asyncio.sleep(0.5)
  5285. # 移除请求监听器
  5286. try:
  5287. self.page.remove_listener("request", handle_request)
  5288. except Exception:
  5289. pass
  5290. print(f"\n[{self.platform_name}] ========== 分页完成 ==========", flush=True)
  5291. print(f"[{self.platform_name}] 📊 分页统计: 总请求次数={iters}, 成功获取页数={page_count}, 累计作品数={len(works)}, 声明总数={total}", flush=True)
  5292. if captured_requests:
  5293. print(f"[{self.platform_name}] 📡 捕获到 {len(captured_requests)} 个 API 请求", flush=True)
  5294. for i, req in enumerate(captured_requests[:3], 1): # 只显示前3个
  5295. print(f"[{self.platform_name}] 请求 {i}: {req['method']} {req['url']}", flush=True)
  5296. if 'x-s' in req['headers']:
  5297. print(f"[{self.platform_name}] X-S: {req['headers']['x-s'][:50]}...", flush=True)
  5298. if 'x-t' in req['headers']:
  5299. print(f"[{self.platform_name}] X-T: {req['headers']['x-t']}", flush=True)
  5300. print(f"[{self.platform_name}] ========================================\n", flush=True)
  5301. except Exception as e:
  5302. import traceback
  5303. error_trace = traceback.format_exc()
  5304. print(f"[{self.platform_name}] 发生异常: {e}", flush=True)
  5305. traceback.print_exc()
  5306. return WorksResult(
  5307. success=False,
  5308. platform=self.platform_name,
  5309. error=str(e),
  5310. debug_info=f"异常详情: {error_trace[:500]}"
  5311. )
  5312. finally:
  5313. await self.close_browser()
  5314. debug_info = f"总请求次数={iters}, 成功获取页数={page_count}, 累计作品数={len(works)}, 声明总数={total}"
  5315. if len(works) == 0:
  5316. debug_info += " | 警告: 没有获取到任何作品,可能原因: Cookie失效、API调用失败、或账号无作品"
  5317. return WorksResult(
  5318. success=True,
  5319. platform=self.platform_name,
  5320. works=works,
  5321. total=total or len(works),
  5322. has_more=False,
  5323. next_page=-1,
  5324. debug_info=debug_info
  5325. )
  5326. async def get_comments(self, cookies: str, work_id: str, cursor: str = "") -> CommentsResult:
  5327. """
  5328. 获取账号下所有作品的评论 —— 完全复刻 get_xiaohongshu_work_comments.py 的7步流程。
  5329. """
  5330. all_comments: List[CommentItem] = []
  5331. total_comments = 0
  5332. has_more = False
  5333. browser = None
  5334. print(222222222222222222222222222222222222)
  5335. print(work_id)
  5336. global stored_cookies
  5337. try:
  5338. # --- Step 1: 初始化浏览器和 Cookie ---
  5339. cookie_list = self.parse_cookies(cookies)
  5340. playwright = await async_playwright().start()
  5341. browser = await playwright.chromium.launch(headless=False)
  5342. context = await browser.new_context(
  5343. viewport={"width": 1400, "height": 900},
  5344. user_agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
  5345. )
  5346. if os.path.exists("cookies.json"):
  5347. with open("cookies.json", "r") as f:
  5348. stored_cookies = json.load(f)
  5349. if stored_cookies:
  5350. await context.add_cookies(stored_cookies)
  5351. page = await context.new_page()
  5352. # --- Step 2: 打开小红书主页 ---
  5353. await page.goto("https://www.xiaohongshu.com", wait_until="domcontentloaded")
  5354. await asyncio.sleep(1.5)
  5355. # --- Step 3: 检查并处理登录弹窗 ---
  5356. try:
  5357. if await page.is_visible(".login-container", timeout=3000):
  5358. await page.wait_for_selector(".login-container", state="hidden", timeout=120000)
  5359. stored_cookies = await context.cookies()
  5360. with open("xiaohongshu_cookies.json", "w") as f:
  5361. json.dump(stored_cookies, f)
  5362. except Exception as e:
  5363. pass # 忽略超时,继续执行
  5364. # --- 提取 User ID ---
  5365. user_id = None
  5366. for cookie in cookie_list:
  5367. if cookie.get('name') == 'x-user-id-creator.xiaohongshu.com':
  5368. user_id = cookie.get('value')
  5369. break
  5370. if not user_id:
  5371. raise ValueError("无法从 Cookie 中提取 user_id")
  5372. # --- Step 4: 跳转到用户主页 ---
  5373. profile_url = f"https://www.xiaohongshu.com/user/profile/{user_id}"
  5374. await page.goto(profile_url, wait_until="domcontentloaded")
  5375. await asyncio.sleep(2)
  5376. # --- 等待笔记区域加载 ---
  5377. try:
  5378. await page.wait_for_selector("#userPostedFeeds .note-item", timeout=20000)
  5379. except:
  5380. raise Exception("笔记区域未加载,请检查账号是否公开或 Cookie 是否有效")
  5381. # --- Step 5: 滚动到底部加载全部笔记 ---
  5382. last_height = None
  5383. while True:
  5384. await page.evaluate("window.scrollTo(0, document.body.scrollHeight)")
  5385. await asyncio.sleep(2)
  5386. new_height = await page.evaluate("document.body.scrollHeight")
  5387. if new_height == last_height:
  5388. break
  5389. last_height = new_height
  5390. # --- 获取所有封面图 ---
  5391. note_imgs = await page.query_selector_all("#userPostedFeeds .note-item .cover img")
  5392. print(f"共找到 {len(note_imgs)} 张封面图")
  5393. # --- Step 6 & 7: 依次点击封面图,捕获评论并结构化 ---
  5394. for i, img in enumerate(note_imgs):
  5395. try:
  5396. # >>> 新增:从 img 提取 note_id 并与 work_id 比较 <<<
  5397. note_id = await img.evaluate('''el => {
  5398. const item = el.closest('.note-item');
  5399. if (!item) return null;
  5400. const link = item.querySelector('a[href^="/explore/"]');
  5401. return link ? link.href.split('/').pop() : null;
  5402. }''')
  5403. if note_id != work_id:
  5404. print(f"note_id {note_id} 与目标 work_id {work_id} 不匹配,跳出循环")
  5405. continue
  5406. # <<< 新增结束 >>>
  5407. await img.scroll_into_view_if_needed()
  5408. await asyncio.sleep(0.5)
  5409. comment_resp = None
  5410. def handle_response(response):
  5411. nonlocal comment_resp
  5412. if "edith.xiaohongshu.com/api/sns/web/v2/comment/page" in response.url:
  5413. comment_resp = response
  5414. page.on("response", handle_response)
  5415. await img.click()
  5416. await asyncio.sleep(1.5)
  5417. page.remove_listener("response", handle_response)
  5418. if not comment_resp:
  5419. await page.keyboard.press("Escape")
  5420. continue
  5421. json_data = await comment_resp.json()
  5422. if not (json_data.get("success") or json_data.get("code") == 0):
  5423. await page.keyboard.press("Escape")
  5424. continue
  5425. data = json_data.get("data", {})
  5426. raw_comments = data.get("comments", [])
  5427. note_id = data.get("note_id", "")
  5428. for main_cmt in raw_comments:
  5429. # 主评论
  5430. user_info = main_cmt.get("user_info", {})
  5431. all_comments.append(CommentItem(
  5432. comment_id=main_cmt["id"],
  5433. parent_comment_id=None,
  5434. work_id=work_id,
  5435. content=main_cmt["content"],
  5436. author_id=user_info.get("user_id", ""),
  5437. author_name=user_info.get("nickname", ""),
  5438. author_avatar=user_info.get("image", ""),
  5439. like_count=int(main_cmt.get("like_count", 0)),
  5440. reply_count=main_cmt.get("sub_comment_count", 0),
  5441. create_time=self._timestamp_to_readable(main_cmt.get("create_time", 0)),
  5442. ))
  5443. # 子评论
  5444. for sub_cmt in main_cmt.get("sub_comments", []):
  5445. sub_user = sub_cmt.get("user_info", {})
  5446. all_comments.append(CommentItem(
  5447. comment_id=sub_cmt["id"],
  5448. parent_comment_id=main_cmt["id"],
  5449. work_id=work_id,
  5450. content=sub_cmt["content"],
  5451. author_id=sub_user.get("user_id", ""),
  5452. author_name=sub_user.get("nickname", ""),
  5453. author_avatar=sub_user.get("image", ""),
  5454. like_count=int(sub_cmt.get("like_count", 0)),
  5455. reply_count=0,
  5456. create_time=self._timestamp_to_readable(sub_cmt.get("create_time", 0)),
  5457. ))
  5458. # 关闭弹窗
  5459. await page.keyboard.press("Escape")
  5460. await asyncio.sleep(1)
  5461. except Exception as e:
  5462. # 出错也尝试关闭弹窗
  5463. try:
  5464. await page.keyboard.press("Escape")
  5465. await asyncio.sleep(0.5)
  5466. except:
  5467. pass
  5468. continue
  5469. # --- 返回结果 ---
  5470. total_comments = len(all_comments)
  5471. # return {
  5472. # 'success': True,
  5473. # 'platform': self.platform_name,
  5474. # 'work_comments': all_comments, # 注意:此处为扁平列表,如需按作品分组可在外层处理
  5475. # 'total': total_comments
  5476. # }
  5477. return CommentsResult(
  5478. success=True,
  5479. platform=self.platform_name,
  5480. work_id=work_id,
  5481. comments=all_comments,
  5482. total=total_comments,
  5483. has_more=has_more
  5484. )
  5485. except Exception as e:
  5486. import traceback
  5487. traceback.print_exc()
  5488. return CommentsResult(
  5489. success=True,
  5490. platform=self.platform_name,
  5491. work_id=work_id,
  5492. total=0
  5493. )
  5494. finally:
  5495. if browser:
  5496. await browser.close()
  5497. def _timestamp_to_readable(self, ts_ms: int) -> str:
  5498. """将毫秒时间戳转换为可读格式"""
  5499. from datetime import datetime
  5500. if not ts_ms:
  5501. return ""
  5502. try:
  5503. return datetime.fromtimestamp(ts_ms / 1000).strftime("%Y-%m-%d %H:%M:%S")
  5504. except Exception:
  5505. return ""
  5506. async def get_all_comments(self, cookies: str) -> dict:
  5507. """获取所有作品的评论 - 通过评论管理页面"""
  5508. print(f"\n{'='*60}")
  5509. print(f"[{self.platform_name}] 获取所有作品评论")
  5510. print(f"{'='*60}")
  5511. all_work_comments = []
  5512. captured_comments = []
  5513. captured_notes = {} # note_id -> note_info
  5514. try:
  5515. await self.init_browser()
  5516. cookie_list = self.parse_cookies(cookies)
  5517. await self.set_cookies(cookie_list)
  5518. if not self.page:
  5519. raise Exception("Page not initialized")
  5520. # 设置 API 响应监听器
  5521. async def handle_response(response):
  5522. nonlocal captured_comments, captured_notes
  5523. url = response.url
  5524. try:
  5525. # 监听评论列表 API - 多种格式
  5526. if '/comment/' in url and ('page' in url or 'list' in url):
  5527. json_data = await response.json()
  5528. print(f"[{self.platform_name}] 捕获到评论 API: {url[:100]}...", flush=True)
  5529. if json_data.get('success') or json_data.get('code') == 0:
  5530. data = json_data.get('data', {})
  5531. comments = data.get('comments', []) or data.get('list', [])
  5532. # 从 URL 中提取 note_id
  5533. import re
  5534. note_id_match = re.search(r'note_id=([^&]+)', url)
  5535. note_id = note_id_match.group(1) if note_id_match else ''
  5536. if comments:
  5537. for comment in comments:
  5538. # 添加 note_id 到评论中
  5539. if note_id and 'note_id' not in comment:
  5540. comment['note_id'] = note_id
  5541. captured_comments.append(comment)
  5542. print(f"[{self.platform_name}] 捕获到 {len(comments)} 条评论 (note_id={note_id}),总计: {len(captured_comments)}", flush=True)
  5543. # 监听笔记列表 API
  5544. if '/note/' in url and ('list' in url or 'posted' in url or 'manager' in url):
  5545. json_data = await response.json()
  5546. if json_data.get('success') or json_data.get('code') == 0:
  5547. data = json_data.get('data', {})
  5548. notes = data.get('notes', []) or data.get('list', [])
  5549. print(f"[{self.platform_name}] 捕获到笔记列表 API: {len(notes)} 个笔记", flush=True)
  5550. for note in notes:
  5551. note_id = note.get('note_id', '') or note.get('id', '')
  5552. if note_id:
  5553. cover_url = ''
  5554. cover = note.get('cover', {})
  5555. if isinstance(cover, dict):
  5556. cover_url = cover.get('url', '') or cover.get('url_default', '')
  5557. elif isinstance(cover, str):
  5558. cover_url = cover
  5559. captured_notes[note_id] = {
  5560. 'title': note.get('title', '') or note.get('display_title', ''),
  5561. 'cover': cover_url,
  5562. }
  5563. except Exception as e:
  5564. print(f"[{self.platform_name}] 解析响应失败: {e}", flush=True)
  5565. self.page.on('response', handle_response)
  5566. print(f"[{self.platform_name}] 已注册 API 响应监听器", flush=True)
  5567. # 访问评论管理页面
  5568. print(f"[{self.platform_name}] 访问评论管理页面...", flush=True)
  5569. await self.page.goto("https://creator.xiaohongshu.com/creator/comment", wait_until="domcontentloaded", timeout=30000)
  5570. await asyncio.sleep(5)
  5571. # 检查登录状态
  5572. current_url = self.page.url
  5573. if "login" in current_url:
  5574. raise Exception("Cookie 已过期,请重新登录")
  5575. print(f"[{self.platform_name}] 页面加载完成,当前捕获: {len(captured_comments)} 条评论, {len(captured_notes)} 个笔记", flush=True)
  5576. # 滚动加载更多评论
  5577. for i in range(5):
  5578. await self.page.evaluate('window.scrollBy(0, 500)')
  5579. await asyncio.sleep(1)
  5580. await asyncio.sleep(3)
  5581. # 移除监听器
  5582. self.page.remove_listener('response', handle_response)
  5583. print(f"[{self.platform_name}] 最终捕获: {len(captured_comments)} 条评论, {len(captured_notes)} 个笔记", flush=True)
  5584. # 按作品分组评论
  5585. work_comments_map = {} # note_id -> work_comments
  5586. for comment in captured_comments:
  5587. # 获取笔记信息
  5588. note_info = comment.get('note_info', {}) or comment.get('note', {})
  5589. note_id = comment.get('note_id', '') or note_info.get('note_id', '') or note_info.get('id', '')
  5590. if not note_id:
  5591. continue
  5592. if note_id not in work_comments_map:
  5593. saved_note = captured_notes.get(note_id, {})
  5594. cover_url = ''
  5595. cover = note_info.get('cover', {})
  5596. if isinstance(cover, dict):
  5597. cover_url = cover.get('url', '') or cover.get('url_default', '')
  5598. elif isinstance(cover, str):
  5599. cover_url = cover
  5600. if not cover_url:
  5601. cover_url = saved_note.get('cover', '')
  5602. work_comments_map[note_id] = {
  5603. 'work_id': note_id,
  5604. 'title': note_info.get('title', '') or note_info.get('display_title', '') or saved_note.get('title', ''),
  5605. 'cover_url': cover_url,
  5606. 'comments': []
  5607. }
  5608. cid = comment.get('id', '') or comment.get('comment_id', '')
  5609. if not cid:
  5610. continue
  5611. user_info = comment.get('user_info', {}) or comment.get('user', {})
  5612. work_comments_map[note_id]['comments'].append({
  5613. 'comment_id': cid,
  5614. 'author_id': user_info.get('user_id', '') or user_info.get('id', ''),
  5615. 'author_name': user_info.get('nickname', '') or user_info.get('name', ''),
  5616. 'author_avatar': user_info.get('image', '') or user_info.get('avatar', ''),
  5617. 'content': comment.get('content', ''),
  5618. 'like_count': comment.get('like_count', 0),
  5619. 'create_time': comment.get('create_time', ''),
  5620. })
  5621. all_work_comments = list(work_comments_map.values())
  5622. total_comments = sum(len(w['comments']) for w in all_work_comments)
  5623. print(f"[{self.platform_name}] 获取到 {len(all_work_comments)} 个作品的 {total_comments} 条评论", flush=True)
  5624. except Exception as e:
  5625. import traceback
  5626. traceback.print_exc()
  5627. return {
  5628. 'success': False,
  5629. 'platform': self.platform_name,
  5630. 'error': str(e),
  5631. 'work_comments': []
  5632. }
  5633. finally:
  5634. await self.close_browser()
  5635. return {
  5636. 'success': True,
  5637. 'platform': self.platform_name,
  5638. 'work_comments': all_work_comments,
  5639. 'total': len(all_work_comments)
  5640. }
  5641. ================================================================================
  5642. 文件: server\python\platforms\baijiahao.py
  5643. ================================================================================
  5644. # -*- coding: utf-8 -*-
  5645. """
  5646. 百家号视频发布器
  5647. """
  5648. import asyncio
  5649. import json
  5650. from typing import List
  5651. from datetime import datetime
  5652. from .base import (
  5653. BasePublisher, PublishParams, PublishResult,
  5654. WorkItem, WorksResult, CommentItem, CommentsResult
  5655. )
  5656. class BaijiahaoPublisher(BasePublisher):
  5657. """
  5658. 百家号视频发布器
  5659. 使用 Playwright 自动化操作百家号创作者中心
  5660. """
  5661. platform_name = "baijiahao"
  5662. login_url = "https://baijiahao.baidu.com/"
  5663. publish_url = "https://baijiahao.baidu.com/builder/rc/edit?type=video"
  5664. cookie_domain = ".baidu.com"
  5665. # 登录检测配置
  5666. login_check_url = "https://baijiahao.baidu.com/builder/rc/home"
  5667. login_indicators = ["passport.baidu.com", "/login", "wappass.baidu.com"]
  5668. login_selectors = ['text="登录"', 'text="请登录"', '[class*="login-btn"]']
  5669. async def get_account_info(self, cookies: str) -> dict:
  5670. """
  5671. 获取百家号账号信息
  5672. 使用直接 HTTP API 调用,不使用浏览器
  5673. """
  5674. import aiohttp
  5675. print(f"\n{'='*60}")
  5676. print(f"[{self.platform_name}] 获取账号信息 (使用 API)")
  5677. print(f"{'='*60}")
  5678. try:
  5679. # 解析 cookies
  5680. cookie_list = self.parse_cookies(cookies)
  5681. cookie_dict = {c['name']: c['value'] for c in cookie_list}
  5682. # 重要:百家号需要先访问主页建立会话上下文
  5683. print(f"[{self.platform_name}] 第一步:访问主页建立会话...")
  5684. session_headers = {
  5685. 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
  5686. 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
  5687. # Cookie 由 session 管理,不手动设置
  5688. 'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8',
  5689. 'Accept-Encoding': 'gzip, deflate, br',
  5690. 'Connection': 'keep-alive',
  5691. 'Upgrade-Insecure-Requests': '1',
  5692. 'Sec-Fetch-Dest': 'document',
  5693. 'Sec-Fetch-Mode': 'navigate',
  5694. 'Sec-Fetch-Site': 'none',
  5695. 'Sec-Fetch-User': '?1',
  5696. 'sec-ch-ua': '"Not_A Brand";v="8", "Chromium";v="120", "Google Chrome";v="120"',
  5697. 'sec-ch-ua-mobile': '?0',
  5698. 'sec-ch-ua-platform': '"Windows"'
  5699. }
  5700. headers = {
  5701. 'Accept': 'application/json, text/plain, */*',
  5702. 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
  5703. # Cookie 由 session 管理,不手动设置
  5704. 'Referer': 'https://baijiahao.baidu.com/builder/rc/home',
  5705. 'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8',
  5706. 'Accept-Encoding': 'gzip, deflate, br',
  5707. 'Connection': 'keep-alive',
  5708. 'Sec-Fetch-Dest': 'empty',
  5709. 'Sec-Fetch-Mode': 'cors',
  5710. 'Sec-Fetch-Site': 'same-origin',
  5711. 'sec-ch-ua': '"Not_A Brand";v="8", "Chromium";v="120", "Google Chrome";v="120"',
  5712. 'sec-ch-ua-mobile': '?0',
  5713. 'sec-ch-ua-platform': '"Windows"'
  5714. }
  5715. # 使用 cookies 参数初始化 session,让 aiohttp 自动管理 cookie 更新
  5716. async with aiohttp.ClientSession(cookies=cookie_dict) as session:
  5717. # 步骤 0: 先访问主页建立会话上下文(关键步骤!)
  5718. print(f"[{self.platform_name}] [0/4] 访问主页建立会话上下文...")
  5719. async with session.get(
  5720. 'https://baijiahao.baidu.com/builder/rc/home',
  5721. headers=session_headers,
  5722. timeout=aiohttp.ClientTimeout(total=30)
  5723. ) as home_response:
  5724. home_status = home_response.status
  5725. print(f"[{self.platform_name}] 主页访问状态: {home_status}")
  5726. # 获取响应头中的新cookies(如果有)
  5727. if 'Set-Cookie' in home_response.headers:
  5728. new_cookies = home_response.headers['Set-Cookie']
  5729. print(f"[{self.platform_name}] 获取到新的会话Cookie")
  5730. # 这里可以处理新的cookies,但暂时跳过复杂处理
  5731. # 短暂等待确保会话建立
  5732. await asyncio.sleep(1)
  5733. # 步骤 1: 获取账号基本信息
  5734. print(f"[{self.platform_name}] [1/4] 调用 appinfo API...")
  5735. async with session.get(
  5736. 'https://baijiahao.baidu.com/builder/app/appinfo',
  5737. headers=headers,
  5738. timeout=aiohttp.ClientTimeout(total=30)
  5739. ) as response:
  5740. appinfo_result = await response.json()
  5741. print(f"[{self.platform_name}] appinfo API 完整响应: {json.dumps(appinfo_result, ensure_ascii=False)[:500]}")
  5742. print(f"[{self.platform_name}] appinfo API 响应: errno={appinfo_result.get('errno')}")
  5743. # 检查登录状态
  5744. if appinfo_result.get('errno') != 0:
  5745. error_msg = appinfo_result.get('errmsg', '未知错误')
  5746. errno = appinfo_result.get('errno')
  5747. print(f"[{self.platform_name}] API 返回错误: errno={errno}, msg={error_msg}")
  5748. # errno 110 表示未登录
  5749. if errno == 110:
  5750. return {
  5751. "success": False,
  5752. "error": "Cookie 已失效,需要重新登录",
  5753. "need_login": True
  5754. }
  5755. # errno 10001402 表示分散认证问题,尝试重新访问主页后重试
  5756. if errno == 10001402:
  5757. print(f"[{self.platform_name}] 检测到分散认证问题,尝试重新访问主页...")
  5758. await asyncio.sleep(2)
  5759. # 重新访问主页
  5760. async with session.get(
  5761. 'https://baijiahao.baidu.com/builder/rc/home',
  5762. headers=session_headers,
  5763. timeout=aiohttp.ClientTimeout(total=30)
  5764. ) as retry_home_response:
  5765. print(f"[{self.platform_name}] 重新访问主页状态: {retry_home_response.status}")
  5766. await asyncio.sleep(1)
  5767. # 重试 API 调用
  5768. async with session.get(
  5769. 'https://baijiahao.baidu.com/builder/app/appinfo',
  5770. headers=headers,
  5771. timeout=aiohttp.ClientTimeout(total=30)
  5772. ) as retry_response:
  5773. retry_result = await retry_response.json()
  5774. if retry_result.get('errno') == 0:
  5775. print(f"[{self.platform_name}] 分散认证问题已解决")
  5776. # 使用重试成功的结果继续处理
  5777. appinfo_result = retry_result
  5778. else:
  5779. print(f"[{self.platform_name}] 重试仍然失败")
  5780. return {
  5781. "success": False,
  5782. "error": f"分散认证问题: {error_msg}",
  5783. "need_login": True
  5784. }
  5785. return {
  5786. "success": False,
  5787. "error": error_msg,
  5788. "need_login": True
  5789. }
  5790. # 获取用户数据
  5791. user_data = appinfo_result.get('data', {}).get('user', {})
  5792. if not user_data:
  5793. return {
  5794. "success": False,
  5795. "error": "无法获取用户信息",
  5796. "need_login": True
  5797. }
  5798. # 检查账号状态
  5799. status = user_data.get('status', '')
  5800. # 有效的账号状态:audit(审核中), pass(已通过), normal(正常), newbie(新手)
  5801. valid_statuses = ['audit', 'pass', 'normal', 'newbie']
  5802. if status not in valid_statuses:
  5803. print(f"[{self.platform_name}] 账号状态异常: {status}")
  5804. # 提取基本信息
  5805. account_name = user_data.get('name') or user_data.get('uname') or '百家号账号'
  5806. app_id = user_data.get('app_id') or user_data.get('id', 0)
  5807. account_id = str(app_id) if app_id else f"baijiahao_{int(datetime.now().timestamp() * 1000)}"
  5808. # 处理头像 URL
  5809. avatar_url = user_data.get('avatar') or user_data.get('avatar_unify', '')
  5810. if avatar_url and avatar_url.startswith('//'):
  5811. avatar_url = 'https:' + avatar_url
  5812. print(f"[{self.platform_name}] 账号名称: {account_name}, ID: {account_id}")
  5813. # 步骤 2: 获取粉丝数(非关键,失败不影响整体)
  5814. fans_count = 0
  5815. try:
  5816. print(f"[{self.platform_name}] [2/3] 调用 growth/get_info API 获取粉丝数...")
  5817. async with session.get(
  5818. 'https://baijiahao.baidu.com/cms-ui/rights/growth/get_info',
  5819. headers=headers,
  5820. timeout=aiohttp.ClientTimeout(total=10)
  5821. ) as response:
  5822. growth_result = await response.json()
  5823. if growth_result.get('errno') == 0:
  5824. growth_data = growth_result.get('data', {})
  5825. fans_count = int(growth_data.get('fans_num', 0))
  5826. print(f"[{self.platform_name}] 粉丝数: {fans_count}")
  5827. else:
  5828. print(f"[{self.platform_name}] 获取粉丝数失败: {growth_result.get('errmsg')}")
  5829. except Exception as e:
  5830. print(f"[{self.platform_name}] 获取粉丝数异常(非关键): {e}")
  5831. # 步骤 3: 获取作品数量(使用与 Node 端一致的 API)
  5832. works_count = 0
  5833. try:
  5834. print(f"[{self.platform_name}] [3/3] 调用 article/lists API 获取作品数...")
  5835. # 使用与 Node 端一致的 API 参数
  5836. list_url = 'https://baijiahao.baidu.com/pcui/article/lists?currentPage=1&pageSize=20&search=&type=&collection=&startDate=&endDate=&clearBeforeFetch=false&dynamic=0'
  5837. async with session.get(
  5838. list_url,
  5839. headers={
  5840. 'accept': '*/*',
  5841. 'user-agent': 'PostmanRuntime/7.51.0',
  5842. # cookie 由 session 管理
  5843. 'referer': 'https://baijiahao.baidu.com/builder/rc/content',
  5844. 'connection': 'keep-alive',
  5845. 'accept-encoding': 'gzip, deflate, br',
  5846. },
  5847. timeout=aiohttp.ClientTimeout(total=30)
  5848. ) as response:
  5849. response_text = await response.text()
  5850. print(f"[{self.platform_name}] ========== Works API Response ==========")
  5851. print(f"[{self.platform_name}] Full response: {response_text[:1000]}...") # 只打印前1000字符
  5852. print(f"[{self.platform_name}] =========================================")
  5853. works_result = json.loads(response_text)
  5854. # 处理分散认证问题 (errno=10001402),重试一次
  5855. if works_result.get('errno') == 10001402:
  5856. print(f"[{self.platform_name}] 分散认证问题 (errno=10001402),3秒后重试...")
  5857. await asyncio.sleep(3)
  5858. # 重试一次,使用更完整的请求头
  5859. retry_headers = headers.copy()
  5860. retry_headers.update({
  5861. 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
  5862. 'Cache-Control': 'max-age=0',
  5863. 'Upgrade-Insecure-Requests': '1',
  5864. })
  5865. async with session.get(
  5866. list_url,
  5867. headers=retry_headers,
  5868. timeout=aiohttp.ClientTimeout(total=30)
  5869. ) as retry_response:
  5870. retry_text = await retry_response.text()
  5871. print(f"[{self.platform_name}] ========== Works API Retry Response ==========")
  5872. print(f"[{self.platform_name}] Full retry response: {retry_text[:1000]}...")
  5873. print(f"[{self.platform_name}] ===============================================")
  5874. works_result = json.loads(retry_text)
  5875. if works_result.get('errno') == 10001402:
  5876. print(f"[{self.platform_name}] 重试仍然失败,返回已获取的账号信息")
  5877. works_result = None
  5878. if works_result and works_result.get('errno') == 0:
  5879. works_data = works_result.get('data', {})
  5880. # 优先使用 data.page.totalCount,如果没有则使用 data.total(兼容旧格式)
  5881. page_info = works_data.get('page', {})
  5882. works_count = int(page_info.get('totalCount', works_data.get('total', 0)))
  5883. print(f"[{self.platform_name}] 作品数: {works_count} (from page.totalCount: {page_info.get('totalCount')}, from total: {works_data.get('total')})")
  5884. else:
  5885. errno = works_result.get('errno') if works_result else 'unknown'
  5886. errmsg = works_result.get('errmsg', 'unknown error') if works_result else 'no response'
  5887. print(f"[{self.platform_name}] 获取作品数失败: errno={errno}, errmsg={errmsg}")
  5888. except Exception as e:
  5889. import traceback
  5890. print(f"[{self.platform_name}] 获取作品数异常(非关键): {e}")
  5891. traceback.print_exc()
  5892. # 返回账号信息
  5893. account_info = {
  5894. "success": True,
  5895. "account_id": account_id,
  5896. "account_name": account_name,
  5897. "avatar_url": avatar_url,
  5898. "fans_count": fans_count,
  5899. "works_count": works_count,
  5900. }
  5901. print(f"[{self.platform_name}] ✓ 获取成功: {account_name} (粉丝: {fans_count}, 作品: {works_count})")
  5902. return account_info
  5903. except Exception as e:
  5904. import traceback
  5905. traceback.print_exc()
  5906. return {
  5907. "success": False,
  5908. "error": str(e)
  5909. }
  5910. async def check_captcha(self) -> dict:
  5911. """检查页面是否需要验证码"""
  5912. if not self.page:
  5913. return {'need_captcha': False, 'captcha_type': ''}
  5914. try:
  5915. # 检查各种验证码
  5916. captcha_selectors = [
  5917. 'text="请输入验证码"',
  5918. 'text="滑动验证"',
  5919. '[class*="captcha"]',
  5920. '[class*="verify"]',
  5921. ]
  5922. for selector in captcha_selectors:
  5923. try:
  5924. if await self.page.locator(selector).count() > 0:
  5925. print(f"[{self.platform_name}] 检测到验证码: {selector}")
  5926. return {'need_captcha': True, 'captcha_type': 'image'}
  5927. except:
  5928. pass
  5929. # 检查登录弹窗
  5930. login_selectors = [
  5931. 'text="请登录"',
  5932. 'text="登录后继续"',
  5933. '[class*="login-dialog"]',
  5934. ]
  5935. for selector in login_selectors:
  5936. try:
  5937. if await self.page.locator(selector).count() > 0:
  5938. print(f"[{self.platform_name}] 检测到需要登录: {selector}")
  5939. return {'need_captcha': True, 'captcha_type': 'login'}
  5940. except:
  5941. pass
  5942. except Exception as e:
  5943. print(f"[{self.platform_name}] 验证码检测异常: {e}")
  5944. return {'need_captcha': False, 'captcha_type': ''}
  5945. async def publish(self, cookies: str, params: PublishParams) -> PublishResult:
  5946. """发布视频到百家号"""
  5947. import os
  5948. print(f"\n{'='*60}")
  5949. print(f"[{self.platform_name}] 开始发布视频")
  5950. print(f"[{self.platform_name}] 视频路径: {params.video_path}")
  5951. print(f"[{self.platform_name}] 标题: {params.title}")
  5952. print(f"[{self.platform_name}] Headless: {self.headless}")
  5953. print(f"{'='*60}")
  5954. self.report_progress(5, "正在初始化浏览器...")
  5955. # 初始化浏览器
  5956. await self.init_browser()
  5957. print(f"[{self.platform_name}] 浏览器初始化完成")
  5958. # 解析并设置 cookies
  5959. cookie_list = self.parse_cookies(cookies)
  5960. print(f"[{self.platform_name}] 解析到 {len(cookie_list)} 个 cookies")
  5961. await self.set_cookies(cookie_list)
  5962. if not self.page:
  5963. raise Exception("Page not initialized")
  5964. # 检查视频文件
  5965. if not os.path.exists(params.video_path):
  5966. raise Exception(f"视频文件不存在: {params.video_path}")
  5967. print(f"[{self.platform_name}] 视频文件存在,大小: {os.path.getsize(params.video_path)} bytes")
  5968. self.report_progress(10, "正在打开上传页面...")
  5969. # 访问视频发布页面(使用新视频发布界面)
  5970. video_publish_url = "https://baijiahao.baidu.com/builder/rc/edit?type=videoV2&is_from_cms=1"
  5971. await self.page.goto(video_publish_url, wait_until="domcontentloaded", timeout=60000)
  5972. await asyncio.sleep(3)
  5973. # 检查是否跳转到登录页
  5974. current_url = self.page.url
  5975. print(f"[{self.platform_name}] 当前页面: {current_url}")
  5976. for indicator in self.login_indicators:
  5977. if indicator in current_url:
  5978. screenshot_base64 = await self.capture_screenshot()
  5979. return PublishResult(
  5980. success=False,
  5981. platform=self.platform_name,
  5982. error="Cookie 已过期,需要重新登录",
  5983. need_captcha=True,
  5984. captcha_type='login',
  5985. screenshot_base64=screenshot_base64,
  5986. page_url=current_url,
  5987. status='need_captcha'
  5988. )
  5989. # 使用 AI 检查验证码
  5990. ai_captcha = await self.ai_check_captcha()
  5991. if ai_captcha['has_captcha']:
  5992. print(f"[{self.platform_name}] AI检测到验证码: {ai_captcha['captcha_type']}", flush=True)
  5993. screenshot_base64 = await self.capture_screenshot()
  5994. return PublishResult(
  5995. success=False,
  5996. platform=self.platform_name,
  5997. error=f"检测到{ai_captcha['captcha_type']}验证码,需要使用有头浏览器完成验证",
  5998. need_captcha=True,
  5999. captcha_type=ai_captcha['captcha_type'],
  6000. screenshot_base64=screenshot_base64,
  6001. page_url=current_url,
  6002. status='need_captcha'
  6003. )
  6004. # 传统方式检查验证码
  6005. captcha_result = await self.check_captcha()
  6006. if captcha_result['need_captcha']:
  6007. screenshot_base64 = await self.capture_screenshot()
  6008. return PublishResult(
  6009. success=False,
  6010. platform=self.platform_name,
  6011. error=f"需要{captcha_result['captcha_type']}验证码,请使用有头浏览器完成验证",
  6012. need_captcha=True,
  6013. captcha_type=captcha_result['captcha_type'],
  6014. screenshot_base64=screenshot_base64,
  6015. page_url=current_url,
  6016. status='need_captcha'
  6017. )
  6018. self.report_progress(15, "正在选择视频文件...")
  6019. # 等待页面加载完成
  6020. await asyncio.sleep(2)
  6021. # 关闭可能的弹窗
  6022. try:
  6023. close_buttons = [
  6024. 'button:has-text("我知道了")',
  6025. 'button:has-text("知道了")',
  6026. '[class*="close"]',
  6027. '[class*="modal-close"]',
  6028. ]
  6029. for btn_selector in close_buttons:
  6030. try:
  6031. btn = self.page.locator(btn_selector).first
  6032. if await btn.count() > 0 and await btn.is_visible():
  6033. await btn.click()
  6034. await asyncio.sleep(0.5)
  6035. except:
  6036. pass
  6037. except:
  6038. pass
  6039. # 上传视频 - 尝试多种方式
  6040. upload_success = False
  6041. # 方法1: 直接通过 file input 上传
  6042. try:
  6043. file_inputs = await self.page.query_selector_all('input[type="file"]')
  6044. print(f"[{self.platform_name}] 找到 {len(file_inputs)} 个文件输入")
  6045. for file_input in file_inputs:
  6046. try:
  6047. await file_input.set_input_files(params.video_path)
  6048. upload_success = True
  6049. print(f"[{self.platform_name}] 通过 file input 上传成功")
  6050. break
  6051. except Exception as e:
  6052. print(f"[{self.platform_name}] file input 上传失败: {e}")
  6053. except Exception as e:
  6054. print(f"[{self.platform_name}] 查找 file input 失败: {e}")
  6055. # 方法2: 点击上传区域
  6056. if not upload_success:
  6057. upload_selectors = [
  6058. 'div[class*="upload-box"]',
  6059. 'div[class*="drag-upload"]',
  6060. 'div[class*="uploader"]',
  6061. 'div:has-text("点击上传")',
  6062. 'div:has-text("选择文件")',
  6063. '[class*="upload-area"]',
  6064. ]
  6065. for selector in upload_selectors:
  6066. if upload_success:
  6067. break
  6068. try:
  6069. upload_area = self.page.locator(selector).first
  6070. if await upload_area.count() > 0:
  6071. print(f"[{self.platform_name}] 尝试点击上传区域: {selector}")
  6072. async with self.page.expect_file_chooser(timeout=10000) as fc_info:
  6073. await upload_area.click()
  6074. file_chooser = await fc_info.value
  6075. await file_chooser.set_files(params.video_path)
  6076. upload_success = True
  6077. print(f"[{self.platform_name}] 通过点击上传区域成功")
  6078. break
  6079. except Exception as e:
  6080. print(f"[{self.platform_name}] 选择器 {selector} 失败: {e}")
  6081. if not upload_success:
  6082. screenshot_base64 = await self.capture_screenshot()
  6083. return PublishResult(
  6084. success=False,
  6085. platform=self.platform_name,
  6086. error="未找到上传入口",
  6087. screenshot_base64=screenshot_base64,
  6088. page_url=await self.get_page_url(),
  6089. status='failed'
  6090. )
  6091. self.report_progress(20, "等待视频上传...")
  6092. # 等待视频上传完成(最多5分钟)
  6093. upload_timeout = 300
  6094. start_time = asyncio.get_event_loop().time()
  6095. while asyncio.get_event_loop().time() - start_time < upload_timeout:
  6096. # 检查上传进度
  6097. progress_text = ''
  6098. try:
  6099. progress_el = self.page.locator('[class*="progress"], [class*="percent"]').first
  6100. if await progress_el.count() > 0:
  6101. progress_text = await progress_el.text_content()
  6102. if progress_text:
  6103. import re
  6104. match = re.search(r'(\d+)%', progress_text)
  6105. if match:
  6106. pct = int(match.group(1))
  6107. self.report_progress(20 + int(pct * 0.4), f"视频上传中 {pct}%...")
  6108. if pct >= 100:
  6109. print(f"[{self.platform_name}] 上传完成")
  6110. break
  6111. except:
  6112. pass
  6113. # 检查是否出现标题输入框(说明上传完成)
  6114. try:
  6115. title_input = self.page.locator('input[placeholder*="标题"], textarea[placeholder*="标题"], [class*="title-input"] input').first
  6116. if await title_input.count() > 0 and await title_input.is_visible():
  6117. print(f"[{self.platform_name}] 检测到标题输入框,上传完成")
  6118. break
  6119. except:
  6120. pass
  6121. # 检查是否有错误提示
  6122. try:
  6123. error_el = self.page.locator('[class*="error"], [class*="fail"]').first
  6124. if await error_el.count() > 0:
  6125. error_text = await error_el.text_content()
  6126. if error_text and ('失败' in error_text or '错误' in error_text):
  6127. raise Exception(f"上传失败: {error_text}")
  6128. except:
  6129. pass
  6130. await asyncio.sleep(3)
  6131. self.report_progress(60, "正在填写标题...")
  6132. await asyncio.sleep(2)
  6133. # 填写标题
  6134. title_filled = False
  6135. title_selectors = [
  6136. 'input[placeholder*="标题"]',
  6137. 'textarea[placeholder*="标题"]',
  6138. '[class*="title-input"] input',
  6139. '[class*="title"] input',
  6140. 'input[maxlength]',
  6141. ]
  6142. for selector in title_selectors:
  6143. if title_filled:
  6144. break
  6145. try:
  6146. title_input = self.page.locator(selector).first
  6147. if await title_input.count() > 0 and await title_input.is_visible():
  6148. await title_input.click()
  6149. await self.page.keyboard.press("Control+KeyA")
  6150. await self.page.keyboard.type(params.title[:30]) # 百家号标题限制30字
  6151. title_filled = True
  6152. print(f"[{self.platform_name}] 标题填写成功")
  6153. except Exception as e:
  6154. print(f"[{self.platform_name}] 标题选择器 {selector} 失败: {e}")
  6155. if not title_filled:
  6156. print(f"[{self.platform_name}] 警告: 未能填写标题")
  6157. # 填写描述
  6158. if params.description:
  6159. self.report_progress(65, "正在填写描述...")
  6160. try:
  6161. desc_selectors = [
  6162. 'textarea[placeholder*="描述"]',
  6163. 'textarea[placeholder*="简介"]',
  6164. '[class*="desc"] textarea',
  6165. '[class*="description"] textarea',
  6166. ]
  6167. for selector in desc_selectors:
  6168. try:
  6169. desc_input = self.page.locator(selector).first
  6170. if await desc_input.count() > 0 and await desc_input.is_visible():
  6171. await desc_input.click()
  6172. await self.page.keyboard.type(params.description[:200])
  6173. print(f"[{self.platform_name}] 描述填写成功")
  6174. break
  6175. except:
  6176. pass
  6177. except Exception as e:
  6178. print(f"[{self.platform_name}] 描述填写失败: {e}")
  6179. self.report_progress(70, "正在发布...")
  6180. await asyncio.sleep(2)
  6181. # 点击发布按钮
  6182. publish_selectors = [
  6183. 'button:has-text("发布")',
  6184. 'button:has-text("发表")',
  6185. 'button:has-text("提交")',
  6186. '[class*="publish"] button',
  6187. '[class*="submit"] button',
  6188. ]
  6189. publish_clicked = False
  6190. for selector in publish_selectors:
  6191. if publish_clicked:
  6192. break
  6193. try:
  6194. btn = self.page.locator(selector).first
  6195. if await btn.count() > 0 and await btn.is_visible():
  6196. # 检查按钮是否可用
  6197. is_disabled = await btn.get_attribute('disabled')
  6198. if is_disabled:
  6199. print(f"[{self.platform_name}] 按钮 {selector} 被禁用")
  6200. continue
  6201. await btn.click()
  6202. publish_clicked = True
  6203. print(f"[{self.platform_name}] 点击发布按钮成功")
  6204. except Exception as e:
  6205. print(f"[{self.platform_name}] 发布按钮 {selector} 失败: {e}")
  6206. if not publish_clicked:
  6207. screenshot_base64 = await self.capture_screenshot()
  6208. return PublishResult(
  6209. success=False,
  6210. platform=self.platform_name,
  6211. error="未找到发布按钮",
  6212. screenshot_base64=screenshot_base64,
  6213. page_url=await self.get_page_url(),
  6214. status='failed'
  6215. )
  6216. self.report_progress(80, "等待发布完成...")
  6217. # 记录点击发布前的 URL
  6218. publish_page_url = self.page.url
  6219. print(f"[{self.platform_name}] 发布前 URL: {publish_page_url}")
  6220. # 等待发布完成(最多3分钟)
  6221. publish_timeout = 180
  6222. start_time = asyncio.get_event_loop().time()
  6223. last_url = publish_page_url
  6224. while asyncio.get_event_loop().time() - start_time < publish_timeout:
  6225. await asyncio.sleep(3)
  6226. current_url = self.page.url
  6227. # 检测 URL 是否发生变化
  6228. if current_url != last_url:
  6229. print(f"[{self.platform_name}] URL 变化: {last_url} -> {current_url}")
  6230. last_url = current_url
  6231. # 检查是否跳转到内容管理页面(真正的成功标志)
  6232. # 百家号发布成功后会跳转到 /builder/rc/content 页面
  6233. if '/builder/rc/content' in current_url and 'edit' not in current_url:
  6234. self.report_progress(100, "发布成功!")
  6235. print(f"[{self.platform_name}] 发布成功,已跳转到内容管理页: {current_url}")
  6236. screenshot_base64 = await self.capture_screenshot()
  6237. return PublishResult(
  6238. success=True,
  6239. platform=self.platform_name,
  6240. message="发布成功",
  6241. screenshot_base64=screenshot_base64,
  6242. page_url=current_url,
  6243. status='success'
  6244. )
  6245. # 检查是否有明确的成功提示弹窗
  6246. try:
  6247. # 百家号发布成功会显示"发布成功"弹窗
  6248. success_modal = self.page.locator('div:has-text("发布成功"), div:has-text("提交成功"), div:has-text("视频发布成功")').first
  6249. if await success_modal.count() > 0 and await success_modal.is_visible():
  6250. self.report_progress(100, "发布成功!")
  6251. print(f"[{self.platform_name}] 检测到发布成功弹窗")
  6252. screenshot_base64 = await self.capture_screenshot()
  6253. # 等待一下看是否会跳转
  6254. await asyncio.sleep(3)
  6255. return PublishResult(
  6256. success=True,
  6257. platform=self.platform_name,
  6258. message="发布成功",
  6259. screenshot_base64=screenshot_base64,
  6260. page_url=self.page.url,
  6261. status='success'
  6262. )
  6263. except Exception as e:
  6264. print(f"[{self.platform_name}] 检测成功提示异常: {e}")
  6265. # 检查是否有错误提示
  6266. try:
  6267. error_selectors = [
  6268. 'div.error-tip',
  6269. 'div[class*="error-msg"]',
  6270. 'span[class*="error"]',
  6271. 'div:has-text("发布失败")',
  6272. 'div:has-text("提交失败")',
  6273. ]
  6274. for error_selector in error_selectors:
  6275. error_el = self.page.locator(error_selector).first
  6276. if await error_el.count() > 0 and await error_el.is_visible():
  6277. error_text = await error_el.text_content()
  6278. if error_text and error_text.strip():
  6279. print(f"[{self.platform_name}] 检测到错误: {error_text}")
  6280. screenshot_base64 = await self.capture_screenshot()
  6281. return PublishResult(
  6282. success=False,
  6283. platform=self.platform_name,
  6284. error=f"发布失败: {error_text.strip()}",
  6285. screenshot_base64=screenshot_base64,
  6286. page_url=current_url,
  6287. status='failed'
  6288. )
  6289. except Exception as e:
  6290. print(f"[{self.platform_name}] 检测错误提示异常: {e}")
  6291. # 检查验证码
  6292. captcha_result = await self.check_captcha()
  6293. if captcha_result['need_captcha']:
  6294. screenshot_base64 = await self.capture_screenshot()
  6295. return PublishResult(
  6296. success=False,
  6297. platform=self.platform_name,
  6298. error=f"发布过程中需要{captcha_result['captcha_type']}验证码",
  6299. need_captcha=True,
  6300. captcha_type=captcha_result['captcha_type'],
  6301. screenshot_base64=screenshot_base64,
  6302. page_url=current_url,
  6303. status='need_captcha'
  6304. )
  6305. # 检查发布按钮状态(如果还在编辑页面)
  6306. if 'edit' in current_url:
  6307. try:
  6308. # 检查是否正在上传/处理中
  6309. processing_indicators = [
  6310. '[class*="loading"]',
  6311. '[class*="uploading"]',
  6312. '[class*="processing"]',
  6313. 'div:has-text("正在上传")',
  6314. 'div:has-text("正在处理")',
  6315. ]
  6316. is_processing = False
  6317. for indicator in processing_indicators:
  6318. if await self.page.locator(indicator).count() > 0:
  6319. is_processing = True
  6320. print(f"[{self.platform_name}] 正在处理中...")
  6321. break
  6322. if not is_processing:
  6323. # 如果不是在处理中,可能需要重新点击发布按钮
  6324. elapsed = asyncio.get_event_loop().time() - start_time
  6325. if elapsed > 30: # 30秒后还在编辑页且不在处理中,可能发布没生效
  6326. print(f"[{self.platform_name}] 发布似乎未生效,尝试重新点击发布按钮...")
  6327. for selector in publish_selectors:
  6328. try:
  6329. btn = self.page.locator(selector).first
  6330. if await btn.count() > 0 and await btn.is_visible():
  6331. is_disabled = await btn.get_attribute('disabled')
  6332. if not is_disabled:
  6333. await btn.click()
  6334. print(f"[{self.platform_name}] 重新点击发布按钮")
  6335. break
  6336. except:
  6337. pass
  6338. except Exception as e:
  6339. print(f"[{self.platform_name}] 检查处理状态异常: {e}")
  6340. # 超时,获取截图分析最终状态
  6341. print(f"[{self.platform_name}] 发布超时,最终 URL: {self.page.url}")
  6342. screenshot_base64 = await self.capture_screenshot()
  6343. # 最后一次检查是否在内容管理页
  6344. final_url = self.page.url
  6345. if '/builder/rc/content' in final_url and 'edit' not in final_url:
  6346. return PublishResult(
  6347. success=True,
  6348. platform=self.platform_name,
  6349. message="发布成功(延迟确认)",
  6350. screenshot_base64=screenshot_base64,
  6351. page_url=final_url,
  6352. status='success'
  6353. )
  6354. return PublishResult(
  6355. success=False,
  6356. platform=self.platform_name,
  6357. error="发布超时,请手动检查发布状态",
  6358. screenshot_base64=screenshot_base64,
  6359. page_url=final_url,
  6360. status='need_action'
  6361. )
  6362. async def get_works(self, cookies: str, page: int = 0, page_size: int = 20) -> WorksResult:
  6363. """
  6364. 获取百家号作品列表
  6365. 优先使用内容管理页的接口(pcui/article/lists)。
  6366. 说明:
  6367. - 该接口通常需要自定义请求头 token(JWT),仅靠 Cookie 可能会返回“未登录”
  6368. - 这里使用 Playwright 打开内容页,从 localStorage/sessionStorage/页面脚本中自动提取 token,
  6369. 再在页面上下文中发起 fetch(携带 cookie + token),以提高成功率
  6370. """
  6371. import re
  6372. print(f"\n{'='*60}")
  6373. print(f"[{self.platform_name}] 获取作品列表 (使用 API)")
  6374. print(f"[{self.platform_name}] page={page}, page_size={page_size}")
  6375. print(f"{'='*60}")
  6376. works: List[WorkItem] = []
  6377. total = 0
  6378. has_more = False
  6379. next_page = ""
  6380. try:
  6381. # 解析并设置 cookies(Playwright)
  6382. cookie_list = self.parse_cookies(cookies)
  6383. await self.init_browser()
  6384. await self.set_cookies(cookie_list)
  6385. if not self.page:
  6386. raise Exception("Page not initialized")
  6387. # 先打开内容管理页,确保本页 Referer/会话就绪
  6388. # Node 侧传 page=0,1,...;接口 currentPage 为 1,2,...
  6389. current_page = int(page) + 1
  6390. page_size = int(page_size)
  6391. content_url = (
  6392. "https://baijiahao.baidu.com/builder/rc/content"
  6393. f"?currentPage={current_page}&pageSize={page_size}"
  6394. "&search=&type=&collection=&startDate=&endDate="
  6395. )
  6396. await self.page.goto(content_url, wait_until="domcontentloaded", timeout=60000)
  6397. await asyncio.sleep(2)
  6398. # 1) 提取 token(JWT)
  6399. token = await self.page.evaluate(
  6400. """
  6401. () => {
  6402. const isJwtLike = (v) => {
  6403. if (!v || typeof v !== 'string') return false;
  6404. const s = v.trim();
  6405. if (s.length < 60) return false;
  6406. const parts = s.split('.');
  6407. if (parts.length !== 3) return false;
  6408. return parts.every(p => /^[A-Za-z0-9_-]+$/.test(p) && p.length > 10);
  6409. };
  6410. const pickFromStorage = (storage) => {
  6411. try {
  6412. const keys = Object.keys(storage || {});
  6413. for (const k of keys) {
  6414. const v = storage.getItem(k);
  6415. if (isJwtLike(v)) return v;
  6416. }
  6417. } catch {}
  6418. return "";
  6419. };
  6420. // localStorage / sessionStorage
  6421. let t = pickFromStorage(window.localStorage);
  6422. if (t) return t;
  6423. t = pickFromStorage(window.sessionStorage);
  6424. if (t) return t;
  6425. // meta 标签
  6426. const meta = document.querySelector('meta[name="token"], meta[name="bjh-token"]');
  6427. const metaToken = meta && meta.getAttribute('content');
  6428. if (isJwtLike(metaToken)) return metaToken;
  6429. // 简单从全局变量里找
  6430. const candidates = [
  6431. (window.__INITIAL_STATE__ && window.__INITIAL_STATE__.token) || "",
  6432. (window.__PRELOADED_STATE__ && window.__PRELOADED_STATE__.token) || "",
  6433. (window.__NUXT__ && window.__NUXT__.state && window.__NUXT__.state.token) || "",
  6434. ];
  6435. for (const c of candidates) {
  6436. if (isJwtLike(c)) return c;
  6437. }
  6438. return "";
  6439. }
  6440. """
  6441. )
  6442. # 2) 若仍未取到 token,再从页面 HTML 兜底提取
  6443. if not token:
  6444. html = await self.page.content()
  6445. m = re.search(r'([A-Za-z0-9_-]{10,}\.[A-Za-z0-9_-]{10,}\.[A-Za-z0-9_-]{10,})', html)
  6446. if m:
  6447. token = m.group(1)
  6448. if not token:
  6449. raise Exception("未能从页面提取 token(可能未登录或触发风控),请重新登录百家号账号后再试")
  6450. # 3) 调用接口(在页面上下文 fetch,自动携带 cookie)
  6451. api_url = (
  6452. "https://baijiahao.baidu.com/pcui/article/lists"
  6453. f"?currentPage={current_page}"
  6454. f"&pageSize={page_size}"
  6455. "&search=&type=&collection=&startDate=&endDate="
  6456. "&clearBeforeFetch=false"
  6457. "&dynamic=1"
  6458. )
  6459. resp = await self.page.evaluate(
  6460. """
  6461. async ({ url, token }) => {
  6462. const r = await fetch(url, {
  6463. method: 'GET',
  6464. credentials: 'include',
  6465. headers: {
  6466. 'accept': 'application/json, text/plain, */*',
  6467. ...(token ? { token } : {}),
  6468. },
  6469. });
  6470. const text = await r.text();
  6471. return { ok: r.ok, status: r.status, text };
  6472. }
  6473. """,
  6474. {"url": api_url, "token": token},
  6475. )
  6476. if not resp or not resp.get("ok"):
  6477. status = resp.get("status") if isinstance(resp, dict) else "unknown"
  6478. raise Exception(f"百家号接口请求失败: HTTP {status}")
  6479. api_result = json.loads(resp.get("text") or "{}")
  6480. print(f"[{self.platform_name}] pcui/article/lists 响应: errno={api_result.get('errno')}, errmsg={api_result.get('errmsg')}")
  6481. if api_result.get("errno") != 0:
  6482. errno = api_result.get("errno")
  6483. errmsg = api_result.get("errmsg", "unknown error")
  6484. # 20040001 常见为“未登录”
  6485. if errno in (110, 20040001):
  6486. raise Exception("百家号未登录或 Cookie/token 失效,请重新登录后再同步")
  6487. raise Exception(f"百家号接口错误: errno={errno}, errmsg={errmsg}")
  6488. data = api_result.get("data", {}) or {}
  6489. items = data.get("list", []) or []
  6490. page_info = data.get("page", {}) or {}
  6491. total = int(page_info.get("totalCount", 0) or 0)
  6492. total_page = int(page_info.get("totalPage", 0) or 0)
  6493. cur_page = int(page_info.get("currentPage", current_page) or current_page)
  6494. has_more = bool(total_page and cur_page < total_page)
  6495. next_page = cur_page + 1 if has_more else ""
  6496. print(f"[{self.platform_name}] 获取到 {len(items)} 个作品,总数: {total}, currentPage={cur_page}, totalPage={total_page}")
  6497. def _pick_cover(item: dict) -> str:
  6498. cover = item.get("crosswise_cover") or item.get("vertical_cover") or ""
  6499. if cover:
  6500. return cover
  6501. raw = item.get("cover_images") or ""
  6502. try:
  6503. # cover_images 可能是 JSON 字符串
  6504. parsed = json.loads(raw) if isinstance(raw, str) else raw
  6505. if isinstance(parsed, list) and parsed:
  6506. first = parsed[0]
  6507. if isinstance(first, dict):
  6508. return first.get("src") or first.get("ori_src") or ""
  6509. if isinstance(first, str):
  6510. return first
  6511. except Exception:
  6512. pass
  6513. return ""
  6514. def _pick_duration(item: dict) -> int:
  6515. for k in ("rmb_duration", "duration", "long"):
  6516. try:
  6517. v = int(item.get(k) or 0)
  6518. if v > 0:
  6519. return v
  6520. except Exception:
  6521. pass
  6522. # displaytype_exinfo 里可能有 ugcvideo.video_info.durationInSecond
  6523. ex = item.get("displaytype_exinfo") or ""
  6524. try:
  6525. exj = json.loads(ex) if isinstance(ex, str) and ex else (ex if isinstance(ex, dict) else {})
  6526. ugc = (exj.get("ugcvideo") or {}) if isinstance(exj, dict) else {}
  6527. vi = ugc.get("video_info") or {}
  6528. v = int(vi.get("durationInSecond") or ugc.get("long") or 0)
  6529. return v if v > 0 else 0
  6530. except Exception:
  6531. return 0
  6532. def _pick_status(item: dict) -> str:
  6533. qs = str(item.get("quality_status") or "").lower()
  6534. st = str(item.get("status") or "").lower()
  6535. if qs == "rejected" or "reject" in st:
  6536. return "rejected"
  6537. if st in ("draft", "unpublish", "unpublished"):
  6538. return "draft"
  6539. # 百家号常见 publish
  6540. return "published"
  6541. for item in items:
  6542. # 优先使用 nid(builder 预览链接使用这个)
  6543. work_id = str(item.get("nid") or item.get("feed_id") or item.get("article_id") or item.get("id") or "")
  6544. if not work_id:
  6545. continue
  6546. works.append(
  6547. WorkItem(
  6548. work_id=work_id,
  6549. title=str(item.get("title") or ""),
  6550. cover_url=_pick_cover(item),
  6551. video_url=str(item.get("url") or ""),
  6552. duration=_pick_duration(item),
  6553. status=_pick_status(item),
  6554. publish_time=str(item.get("publish_time") or item.get("publish_at") or item.get("created_at") or ""),
  6555. play_count=int(item.get("read_amount") or 0),
  6556. like_count=int(item.get("like_amount") or 0),
  6557. comment_count=int(item.get("comment_amount") or 0),
  6558. share_count=int(item.get("share_amount") or 0),
  6559. collect_count=int(item.get("collection_amount") or 0),
  6560. )
  6561. )
  6562. print(f"[{self.platform_name}] ✓ 成功解析 {len(works)} 个作品")
  6563. except Exception as e:
  6564. import traceback
  6565. traceback.print_exc()
  6566. return WorksResult(
  6567. success=False,
  6568. platform=self.platform_name,
  6569. error=str(e),
  6570. debug_info="baijiahao_get_works_failed"
  6571. )
  6572. return WorksResult(
  6573. success=True,
  6574. platform=self.platform_name,
  6575. works=works,
  6576. total=total,
  6577. has_more=has_more,
  6578. next_page=next_page
  6579. )
  6580. async def check_login_status(self, cookies: str) -> dict:
  6581. """
  6582. 检查百家号 Cookie 登录状态
  6583. 使用直接 HTTP API 调用,不使用浏览器
  6584. """
  6585. import aiohttp
  6586. print(f"[{self.platform_name}] 检查登录状态 (使用 API)")
  6587. try:
  6588. # 解析 cookies
  6589. cookie_list = self.parse_cookies(cookies)
  6590. cookie_dict = {c['name']: c['value'] for c in cookie_list}
  6591. # 重要:百家号需要先访问主页建立会话上下文
  6592. session_headers = {
  6593. 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
  6594. 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
  6595. # Cookie 由 session 管理
  6596. 'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8',
  6597. 'Accept-Encoding': 'gzip, deflate, br',
  6598. 'Connection': 'keep-alive',
  6599. 'Upgrade-Insecure-Requests': '1',
  6600. 'Sec-Fetch-Dest': 'document',
  6601. 'Sec-Fetch-Mode': 'navigate',
  6602. 'Sec-Fetch-Site': 'none',
  6603. 'Sec-Fetch-User': '?1',
  6604. 'sec-ch-ua': '"Not_A Brand";v="8", "Chromium";v="120", "Google Chrome";v="120"',
  6605. 'sec-ch-ua-mobile': '?0',
  6606. 'sec-ch-ua-platform': '"Windows"'
  6607. }
  6608. headers = {
  6609. 'Accept': 'application/json, text/plain, */*',
  6610. 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
  6611. # Cookie 由 session 管理
  6612. 'Referer': 'https://baijiahao.baidu.com/builder/rc/home',
  6613. 'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8',
  6614. 'Accept-Encoding': 'gzip, deflate, br',
  6615. 'Connection': 'keep-alive',
  6616. 'Sec-Fetch-Dest': 'empty',
  6617. 'Sec-Fetch-Mode': 'cors',
  6618. 'Sec-Fetch-Site': 'same-origin',
  6619. 'sec-ch-ua': '"Not_A Brand";v="8", "Chromium";v="120", "Google Chrome";v="120"',
  6620. 'sec-ch-ua-mobile': '?0',
  6621. 'sec-ch-ua-platform': '"Windows"'
  6622. }
  6623. async with aiohttp.ClientSession(cookies=cookie_dict) as session:
  6624. # 步骤 0: 先访问主页建立会话上下文(关键步骤!)
  6625. print(f"[{self.platform_name}] [0/2] 访问主页建立会话上下文...")
  6626. async with session.get(
  6627. 'https://baijiahao.baidu.com/builder/rc/home',
  6628. headers=session_headers,
  6629. timeout=aiohttp.ClientTimeout(total=30)
  6630. ) as home_response:
  6631. home_status = home_response.status
  6632. print(f"[{self.platform_name}] 主页访问状态: {home_status}")
  6633. # 短暂等待确保会话建立
  6634. await asyncio.sleep(1)
  6635. # 步骤 1: 调用 API 检查登录状态
  6636. print(f"[{self.platform_name}] [1/2] 调用 appinfo API 检查登录状态...")
  6637. async with session.get(
  6638. 'https://baijiahao.baidu.com/builder/app/appinfo',
  6639. headers=headers,
  6640. timeout=aiohttp.ClientTimeout(total=30)
  6641. ) as response:
  6642. api_result = await response.json()
  6643. errno = api_result.get('errno')
  6644. print(f"[{self.platform_name}] API 完整响应: {json.dumps(api_result, ensure_ascii=False)[:500]}")
  6645. print(f"[{self.platform_name}] API 响应: errno={errno}")
  6646. # errno 为 0 表示请求成功
  6647. if errno == 0:
  6648. # 检查是否有用户数据
  6649. user_data = api_result.get('data', {}).get('user', {})
  6650. if user_data:
  6651. # 检查账号状态
  6652. status = user_data.get('status', '')
  6653. account_name = user_data.get('name') or user_data.get('uname', '')
  6654. # 有效的账号状态:audit(审核中), pass(已通过), normal(正常), newbie(新手)
  6655. valid_statuses = ['audit', 'pass', 'normal', 'newbie']
  6656. if status in valid_statuses and account_name:
  6657. print(f"[{self.platform_name}] ✓ 登录状态有效: {account_name} (status={status})")
  6658. return {
  6659. "success": True,
  6660. "valid": True,
  6661. "need_login": False,
  6662. "message": "登录状态有效"
  6663. }
  6664. else:
  6665. print(f"[{self.platform_name}] 账号状态异常: status={status}, name={account_name}")
  6666. return {
  6667. "success": True,
  6668. "valid": False,
  6669. "need_login": True,
  6670. "message": f"账号状态异常: {status}"
  6671. }
  6672. else:
  6673. print(f"[{self.platform_name}] 无用户数据,Cookie 可能无效")
  6674. return {
  6675. "success": True,
  6676. "valid": False,
  6677. "need_login": True,
  6678. "message": "无用户数据"
  6679. }
  6680. # errno 非 0 表示请求失败
  6681. # 常见错误码:110 = 未登录
  6682. error_msg = api_result.get('errmsg', '未知错误')
  6683. print(f"[{self.platform_name}] Cookie 无效: errno={errno}, msg={error_msg}")
  6684. return {
  6685. "success": True,
  6686. "valid": False,
  6687. "need_login": True,
  6688. "message": error_msg
  6689. }
  6690. except Exception as e:
  6691. import traceback
  6692. traceback.print_exc()
  6693. return {
  6694. "success": False,
  6695. "valid": False,
  6696. "need_login": True,
  6697. "error": str(e)
  6698. }
  6699. async def get_comments(self, cookies: str, work_id: str, cursor: str = "") -> CommentsResult:
  6700. """获取百家号作品评论"""
  6701. # TODO: 实现评论获取逻辑
  6702. return CommentsResult(
  6703. success=False,
  6704. platform=self.platform_name,
  6705. work_id=work_id,
  6706. error="百家号评论功能暂未实现"
  6707. )
  6708. ================================================================================
  6709. 文件: server\python\platforms\douyin.py
  6710. ================================================================================
  6711. # -*- coding: utf-8 -*-
  6712. """
  6713. 抖音视频发布器
  6714. 参考: matrix/douyin_uploader/main.py
  6715. """
  6716. import asyncio
  6717. import os
  6718. import json
  6719. import re
  6720. from datetime import datetime
  6721. from typing import List
  6722. from .base import (
  6723. BasePublisher, PublishParams, PublishResult,
  6724. WorkItem, WorksResult, CommentItem, CommentsResult
  6725. )
  6726. class DouyinPublisher(BasePublisher):
  6727. """
  6728. 抖音视频发布器
  6729. 使用 Playwright 自动化操作抖音创作者中心
  6730. """
  6731. platform_name = "douyin"
  6732. login_url = "https://creator.douyin.com/"
  6733. publish_url = "https://creator.douyin.com/creator-micro/content/upload"
  6734. cookie_domain = ".douyin.com"
  6735. async def set_schedule_time(self, publish_date: datetime):
  6736. """设置定时发布"""
  6737. if not self.page:
  6738. return
  6739. # 选择定时发布
  6740. label_element = self.page.locator("label.radio-d4zkru:has-text('定时发布')")
  6741. await label_element.click()
  6742. await asyncio.sleep(1)
  6743. # 输入时间
  6744. publish_date_str = publish_date.strftime("%Y-%m-%d %H:%M")
  6745. await self.page.locator('.semi-input[placeholder="日期和时间"]').click()
  6746. await self.page.keyboard.press("Control+KeyA")
  6747. await self.page.keyboard.type(str(publish_date_str))
  6748. await self.page.keyboard.press("Enter")
  6749. await asyncio.sleep(1)
  6750. async def handle_upload_error(self, video_path: str):
  6751. """处理上传错误,重新上传"""
  6752. if not self.page:
  6753. return
  6754. print(f"[{self.platform_name}] 视频出错了,重新上传中...")
  6755. await self.page.locator('div.progress-div [class^="upload-btn-input"]').set_input_files(video_path)
  6756. async def check_captcha(self) -> dict:
  6757. """
  6758. 检查页面是否需要验证码
  6759. 返回: {'need_captcha': bool, 'captcha_type': str}
  6760. """
  6761. if not self.page:
  6762. return {'need_captcha': False, 'captcha_type': ''}
  6763. try:
  6764. # 检查手机验证码弹窗
  6765. phone_captcha_selectors = [
  6766. 'text="请输入验证码"',
  6767. 'text="输入手机验证码"',
  6768. 'text="获取验证码"',
  6769. 'text="手机号验证"',
  6770. '[class*="captcha"][class*="phone"]',
  6771. '[class*="verify"][class*="phone"]',
  6772. '[class*="sms-code"]',
  6773. 'input[placeholder*="验证码"]',
  6774. ]
  6775. for selector in phone_captcha_selectors:
  6776. try:
  6777. if await self.page.locator(selector).count() > 0:
  6778. print(f"[{self.platform_name}] 检测到手机验证码: {selector}", flush=True)
  6779. return {'need_captcha': True, 'captcha_type': 'phone'}
  6780. except:
  6781. pass
  6782. # 检查滑块验证码
  6783. slider_captcha_selectors = [
  6784. '[class*="captcha"][class*="slider"]',
  6785. '[class*="slide-verify"]',
  6786. '[class*="drag-verify"]',
  6787. 'text="按住滑块"',
  6788. 'text="向右滑动"',
  6789. 'text="拖动滑块"',
  6790. ]
  6791. for selector in slider_captcha_selectors:
  6792. try:
  6793. if await self.page.locator(selector).count() > 0:
  6794. print(f"[{self.platform_name}] 检测到滑块验证码: {selector}", flush=True)
  6795. return {'need_captcha': True, 'captcha_type': 'slider'}
  6796. except:
  6797. pass
  6798. # 检查图片验证码
  6799. image_captcha_selectors = [
  6800. '[class*="captcha"][class*="image"]',
  6801. '[class*="verify-image"]',
  6802. 'text="点击图片"',
  6803. 'text="选择正确的"',
  6804. ]
  6805. for selector in image_captcha_selectors:
  6806. try:
  6807. if await self.page.locator(selector).count() > 0:
  6808. print(f"[{self.platform_name}] 检测到图片验证码: {selector}", flush=True)
  6809. return {'need_captcha': True, 'captcha_type': 'image'}
  6810. except:
  6811. pass
  6812. # 检查登录弹窗(Cookie 过期)
  6813. login_selectors = [
  6814. 'text="请先登录"',
  6815. 'text="登录后继续"',
  6816. '[class*="login-modal"]',
  6817. '[class*="login-dialog"]',
  6818. ]
  6819. for selector in login_selectors:
  6820. try:
  6821. if await self.page.locator(selector).count() > 0:
  6822. print(f"[{self.platform_name}] 检测到需要登录: {selector}", flush=True)
  6823. return {'need_captcha': True, 'captcha_type': 'login'}
  6824. except:
  6825. pass
  6826. except Exception as e:
  6827. print(f"[{self.platform_name}] 验证码检测异常: {e}", flush=True)
  6828. return {'need_captcha': False, 'captcha_type': ''}
  6829. async def handle_phone_captcha(self) -> bool:
  6830. if not self.page:
  6831. return False
  6832. try:
  6833. body_text = ""
  6834. try:
  6835. body_text = await self.page.inner_text("body")
  6836. except:
  6837. body_text = ""
  6838. phone_match = re.search(r"(1\d{2}\*{4}\d{4})", body_text or "")
  6839. masked_phone = phone_match.group(1) if phone_match else ""
  6840. async def _get_send_button():
  6841. candidates = [
  6842. self.page.get_by_role("button", name="获取验证码"),
  6843. self.page.get_by_role("button", name="发送验证码"),
  6844. self.page.locator('button:has-text("获取验证码")'),
  6845. self.page.locator('button:has-text("发送验证码")'),
  6846. self.page.locator('[role="button"]:has-text("获取验证码")'),
  6847. self.page.locator('[role="button"]:has-text("发送验证码")'),
  6848. ]
  6849. for c in candidates:
  6850. try:
  6851. if await c.count() > 0 and await c.first.is_visible():
  6852. return c.first
  6853. except:
  6854. continue
  6855. return None
  6856. async def _confirm_sent() -> bool:
  6857. try:
  6858. txt = ""
  6859. try:
  6860. txt = await self.page.inner_text("body")
  6861. except:
  6862. txt = ""
  6863. if re.search(r"(\d+\s*秒)|(\d+\s*s)|后可重试|重新发送|已发送", txt or ""):
  6864. return True
  6865. except:
  6866. pass
  6867. try:
  6868. btn = await _get_send_button()
  6869. if btn:
  6870. disabled = await btn.is_disabled()
  6871. if disabled:
  6872. return True
  6873. label = (await btn.inner_text()) if btn else ""
  6874. if re.search(r"(\d+\s*秒)|(\d+\s*s)|后可重试|重新发送|已发送", label or ""):
  6875. return True
  6876. except:
  6877. pass
  6878. return False
  6879. did_click_send = False
  6880. btn = await _get_send_button()
  6881. if btn:
  6882. try:
  6883. if await btn.is_enabled():
  6884. await btn.click(timeout=5000)
  6885. did_click_send = True
  6886. print(f"[{self.platform_name}] 已点击发送短信验证码", flush=True)
  6887. except Exception as e:
  6888. print(f"[{self.platform_name}] 点击发送验证码按钮失败: {e}", flush=True)
  6889. if did_click_send:
  6890. try:
  6891. await self.page.wait_for_timeout(800)
  6892. except:
  6893. pass
  6894. sent_confirmed = await _confirm_sent() if did_click_send else False
  6895. ai_state = await self.ai_analyze_sms_send_state()
  6896. try:
  6897. if ai_state.get("sent_likely"):
  6898. sent_confirmed = True
  6899. except:
  6900. pass
  6901. if (not did_click_send or not sent_confirmed) and ai_state.get("suggested_action") == "click_send":
  6902. btn2 = await _get_send_button()
  6903. if btn2:
  6904. try:
  6905. if await btn2.is_enabled():
  6906. await btn2.click(timeout=5000)
  6907. did_click_send = True
  6908. await self.page.wait_for_timeout(800)
  6909. sent_confirmed = await _confirm_sent()
  6910. ai_state = await self.ai_analyze_sms_send_state()
  6911. if ai_state.get("sent_likely"):
  6912. sent_confirmed = True
  6913. except:
  6914. pass
  6915. code_hint = "请输入短信验证码。"
  6916. if ai_state.get("block_reason") == "slider":
  6917. code_hint = "检测到滑块/人机验证阻塞,请先在浏览器窗口完成验证后再发送短信验证码。"
  6918. elif ai_state.get("block_reason") in ["rate_limit", "risk"]:
  6919. code_hint = f"页面提示可能被限制/风控({ai_state.get('notes','') or '请稍后重试'})。可稍等后重新发送验证码。"
  6920. elif not did_click_send:
  6921. code_hint = "未找到或无法点击“发送验证码”按钮,请在弹出的浏览器页面手动点击发送后再输入验证码。"
  6922. elif sent_confirmed:
  6923. code_hint = f"已检测到短信验证码已发送({ai_state.get('notes','') or '请查收短信'})。"
  6924. else:
  6925. code_hint = f"已尝试点击发送验证码,但未确认发送成功({ai_state.get('notes','') or '请查看是否出现倒计时/重新发送'})。"
  6926. code = await self.request_sms_code_from_frontend(masked_phone, message=code_hint)
  6927. input_selectors = [
  6928. 'input[placeholder*="验证码"]',
  6929. 'input[placeholder*="短信"]',
  6930. 'input[type="tel"]',
  6931. 'input[type="text"]',
  6932. ]
  6933. filled = False
  6934. for selector in input_selectors:
  6935. try:
  6936. el = self.page.locator(selector).first
  6937. if await el.count() > 0:
  6938. await el.fill(code)
  6939. filled = True
  6940. break
  6941. except:
  6942. continue
  6943. if not filled:
  6944. raise Exception("未找到验证码输入框")
  6945. submit_selectors = [
  6946. 'button:has-text("确定")',
  6947. 'button:has-text("确认")',
  6948. 'button:has-text("提交")',
  6949. 'button:has-text("完成")',
  6950. ]
  6951. for selector in submit_selectors:
  6952. try:
  6953. btn = self.page.locator(selector).first
  6954. if await btn.count() > 0:
  6955. await btn.click()
  6956. break
  6957. except:
  6958. continue
  6959. try:
  6960. await self.page.wait_for_timeout(1000)
  6961. await self.page.wait_for_selector('text="请输入验证码"', state="hidden", timeout=15000)
  6962. except:
  6963. pass
  6964. print(f"[{self.platform_name}] 短信验证码已提交,继续执行发布流程", flush=True)
  6965. return True
  6966. except Exception as e:
  6967. print(f"[{self.platform_name}] 处理短信验证码失败: {e}", flush=True)
  6968. return False
  6969. async def publish(self, cookies: str, params: PublishParams) -> PublishResult:
  6970. """发布视频到抖音 - 参考 matrix/douyin_uploader/main.py"""
  6971. print(f"\n{'='*60}")
  6972. print(f"[{self.platform_name}] 开始发布视频")
  6973. print(f"[{self.platform_name}] 视频路径: {params.video_path}")
  6974. print(f"[{self.platform_name}] 标题: {params.title}")
  6975. print(f"[{self.platform_name}] Headless: {self.headless}")
  6976. print(f"{'='*60}")
  6977. self.report_progress(5, "正在初始化浏览器...")
  6978. # 初始化浏览器
  6979. await self.init_browser()
  6980. print(f"[{self.platform_name}] 浏览器初始化完成")
  6981. # 解析并设置 cookies
  6982. cookie_list = self.parse_cookies(cookies)
  6983. print(f"[{self.platform_name}] 解析到 {len(cookie_list)} 个 cookies")
  6984. await self.set_cookies(cookie_list)
  6985. if not self.page:
  6986. raise Exception("Page not initialized")
  6987. # 检查视频文件
  6988. if not os.path.exists(params.video_path):
  6989. raise Exception(f"视频文件不存在: {params.video_path}")
  6990. print(f"[{self.platform_name}] 视频文件存在,大小: {os.path.getsize(params.video_path)} bytes")
  6991. self.report_progress(10, "正在打开上传页面...")
  6992. # 访问上传页面 - 参考 matrix
  6993. await self.page.goto("https://creator.douyin.com/creator-micro/content/upload")
  6994. print(f"[{self.platform_name}] 等待页面加载...")
  6995. try:
  6996. await self.page.wait_for_url("https://creator.douyin.com/creator-micro/content/upload", timeout=30000)
  6997. except:
  6998. pass
  6999. await asyncio.sleep(3)
  7000. # 检查当前 URL 和页面状态
  7001. current_url = self.page.url
  7002. print(f"[{self.platform_name}] 当前 URL: {current_url}")
  7003. async def wait_for_manual_login(timeout_seconds: int = 300) -> bool:
  7004. if not self.page:
  7005. return False
  7006. self.report_progress(8, "检测到需要登录,请在浏览器窗口完成登录...")
  7007. try:
  7008. await self.page.bring_to_front()
  7009. except:
  7010. pass
  7011. waited = 0
  7012. while waited < timeout_seconds:
  7013. try:
  7014. url = self.page.url
  7015. if "login" not in url and "passport" not in url:
  7016. if "creator.douyin.com" in url:
  7017. return True
  7018. await asyncio.sleep(2)
  7019. waited += 2
  7020. except:
  7021. await asyncio.sleep(2)
  7022. waited += 2
  7023. return False
  7024. # 检查是否在登录页面或需要登录
  7025. if "login" in current_url or "passport" in current_url:
  7026. if not self.headless:
  7027. logged_in = await wait_for_manual_login()
  7028. if logged_in:
  7029. try:
  7030. if self.context:
  7031. cookies_after = await self.context.cookies()
  7032. await self.sync_cookies_to_node(cookies_after)
  7033. except:
  7034. pass
  7035. await self.page.goto("https://creator.douyin.com/creator-micro/content/upload")
  7036. await asyncio.sleep(3)
  7037. current_url = self.page.url
  7038. else:
  7039. screenshot_base64 = await self.capture_screenshot()
  7040. return PublishResult(
  7041. success=False,
  7042. platform=self.platform_name,
  7043. error="需要登录:请在浏览器窗口完成登录后重试",
  7044. need_captcha=True,
  7045. captcha_type='login',
  7046. screenshot_base64=screenshot_base64,
  7047. page_url=current_url,
  7048. status='need_captcha'
  7049. )
  7050. else:
  7051. screenshot_base64 = await self.capture_screenshot()
  7052. return PublishResult(
  7053. success=False,
  7054. platform=self.platform_name,
  7055. error="Cookie 已过期,需要重新登录",
  7056. need_captcha=True,
  7057. captcha_type='login',
  7058. screenshot_base64=screenshot_base64,
  7059. page_url=current_url,
  7060. status='need_captcha'
  7061. )
  7062. # 使用 AI 检测验证码
  7063. ai_captcha_result = await self.ai_check_captcha()
  7064. if ai_captcha_result['has_captcha']:
  7065. print(f"[{self.platform_name}] AI检测到验证码: {ai_captcha_result['captcha_type']}", flush=True)
  7066. screenshot_base64 = await self.capture_screenshot()
  7067. return PublishResult(
  7068. success=False,
  7069. platform=self.platform_name,
  7070. error=f"检测到{ai_captcha_result['captcha_type']}验证码,需要使用有头浏览器完成验证",
  7071. need_captcha=True,
  7072. captcha_type=ai_captcha_result['captcha_type'],
  7073. screenshot_base64=screenshot_base64,
  7074. page_url=current_url,
  7075. status='need_captcha'
  7076. )
  7077. # 传统方式检测验证码
  7078. captcha_result = await self.check_captcha()
  7079. if captcha_result['need_captcha']:
  7080. print(f"[{self.platform_name}] 传统方式检测到验证码: {captcha_result['captcha_type']}", flush=True)
  7081. if captcha_result['captcha_type'] == 'phone':
  7082. handled = await self.handle_phone_captcha()
  7083. if handled:
  7084. self.report_progress(12, "短信验证码已处理,继续发布...")
  7085. else:
  7086. screenshot_base64 = await self.capture_screenshot()
  7087. return PublishResult(
  7088. success=False,
  7089. platform=self.platform_name,
  7090. error="检测到手机验证码,但自动处理失败",
  7091. need_captcha=True,
  7092. captcha_type='phone',
  7093. screenshot_base64=screenshot_base64,
  7094. page_url=current_url,
  7095. status='need_captcha'
  7096. )
  7097. else:
  7098. screenshot_base64 = await self.capture_screenshot()
  7099. return PublishResult(
  7100. success=False,
  7101. platform=self.platform_name,
  7102. error=f"需要{captcha_result['captcha_type']}验证码,请使用有头浏览器完成验证",
  7103. need_captcha=True,
  7104. captcha_type=captcha_result['captcha_type'],
  7105. screenshot_base64=screenshot_base64,
  7106. page_url=current_url,
  7107. status='need_captcha'
  7108. )
  7109. self.report_progress(15, "正在选择视频文件...")
  7110. # 点击上传区域 - 参考 matrix: div.container-drag-info-Tl0RGH 或带 container-drag 的 div
  7111. upload_selectors = [
  7112. "div[class*='container-drag-info']",
  7113. "div[class*='container-drag']",
  7114. "div.upload-btn",
  7115. "div[class*='upload']",
  7116. ]
  7117. upload_success = False
  7118. for selector in upload_selectors:
  7119. try:
  7120. upload_div = self.page.locator(selector).first
  7121. if await upload_div.count() > 0:
  7122. print(f"[{self.platform_name}] 找到上传区域: {selector}")
  7123. async with self.page.expect_file_chooser(timeout=10000) as fc_info:
  7124. await upload_div.click()
  7125. file_chooser = await fc_info.value
  7126. await file_chooser.set_files(params.video_path)
  7127. upload_success = True
  7128. print(f"[{self.platform_name}] 视频文件已选择")
  7129. break
  7130. except Exception as e:
  7131. print(f"[{self.platform_name}] 选择器 {selector} 失败: {e}")
  7132. if not upload_success:
  7133. screenshot_base64 = await self.capture_screenshot()
  7134. return PublishResult(
  7135. success=False,
  7136. platform=self.platform_name,
  7137. error="未找到上传入口",
  7138. screenshot_base64=screenshot_base64,
  7139. page_url=await self.get_page_url(),
  7140. status='failed'
  7141. )
  7142. # 等待跳转到发布页面 - 参考 matrix
  7143. self.report_progress(20, "等待进入发布页面...")
  7144. for i in range(60):
  7145. try:
  7146. # matrix 等待的 URL: https://creator.douyin.com/creator-micro/content/post/video?enter_from=publish_page
  7147. await self.page.wait_for_url(
  7148. "https://creator.douyin.com/creator-micro/content/post/video*",
  7149. timeout=2000
  7150. )
  7151. print(f"[{self.platform_name}] 已进入发布页面")
  7152. break
  7153. except:
  7154. print(f"[{self.platform_name}] 等待进入发布页面... {i+1}/60")
  7155. await asyncio.sleep(1)
  7156. await asyncio.sleep(2)
  7157. self.report_progress(30, "正在填充标题和话题...")
  7158. # 填写标题 - 参考 matrix
  7159. title_input = self.page.get_by_text('作品标题').locator("..").locator(
  7160. "xpath=following-sibling::div[1]").locator("input")
  7161. if await title_input.count():
  7162. await title_input.fill(params.title[:30])
  7163. print(f"[{self.platform_name}] 标题已填写")
  7164. else:
  7165. # 备用方式 - 参考 matrix
  7166. title_container = self.page.locator(".notranslate")
  7167. await title_container.click()
  7168. await self.page.keyboard.press("Backspace")
  7169. await self.page.keyboard.press("Control+KeyA")
  7170. await self.page.keyboard.press("Delete")
  7171. await self.page.keyboard.type(params.title)
  7172. await self.page.keyboard.press("Enter")
  7173. print(f"[{self.platform_name}] 标题已填写(备用方式)")
  7174. # 添加话题标签 - 参考 matrix
  7175. if params.tags:
  7176. css_selector = ".zone-container"
  7177. for index, tag in enumerate(params.tags, start=1):
  7178. print(f"[{self.platform_name}] 正在添加第{index}个话题: #{tag}")
  7179. await self.page.type(css_selector, "#" + tag)
  7180. await self.page.press(css_selector, "Space")
  7181. self.report_progress(40, "等待视频上传完成...")
  7182. # 等待视频上传完成 - 参考 matrix: 检测"重新上传"按钮
  7183. for i in range(120):
  7184. try:
  7185. count = await self.page.locator("div").filter(has_text="重新上传").count()
  7186. if count > 0:
  7187. print(f"[{self.platform_name}] 视频上传完毕")
  7188. break
  7189. else:
  7190. print(f"[{self.platform_name}] 正在上传视频中... {i+1}/120")
  7191. # 检查上传错误
  7192. if await self.page.locator('div.progress-div > div:has-text("上传失败")').count():
  7193. print(f"[{self.platform_name}] 发现上传出错了,重新上传...")
  7194. await self.handle_upload_error(params.video_path)
  7195. await asyncio.sleep(3)
  7196. except:
  7197. print(f"[{self.platform_name}] 正在上传视频中...")
  7198. await asyncio.sleep(3)
  7199. self.report_progress(60, "处理视频设置...")
  7200. # 点击"我知道了"弹窗 - 参考 matrix
  7201. known_count = await self.page.get_by_role("button", name="我知道了").count()
  7202. if known_count > 0:
  7203. await self.page.get_by_role("button", name="我知道了").nth(0).click()
  7204. print(f"[{self.platform_name}] 关闭弹窗")
  7205. await asyncio.sleep(5)
  7206. # 设置位置 - 参考 matrix
  7207. try:
  7208. await self.page.locator('div.semi-select span:has-text("输入地理位置")').click()
  7209. await asyncio.sleep(1)
  7210. await self.page.keyboard.press("Backspace")
  7211. await self.page.keyboard.press("Control+KeyA")
  7212. await self.page.keyboard.press("Delete")
  7213. await self.page.keyboard.type(params.location)
  7214. await asyncio.sleep(1)
  7215. await self.page.locator('div[role="listbox"] [role="option"]').first.click()
  7216. print(f"[{self.platform_name}] 位置设置成功: {params.location}")
  7217. except Exception as e:
  7218. print(f"[{self.platform_name}] 设置位置失败: {e}")
  7219. # 开启头条/西瓜同步 - 参考 matrix
  7220. try:
  7221. third_part_element = '[class^="info"] > [class^="first-part"] div div.semi-switch'
  7222. if await self.page.locator(third_part_element).count():
  7223. class_name = await self.page.eval_on_selector(
  7224. third_part_element, 'div => div.className')
  7225. if 'semi-switch-checked' not in class_name:
  7226. await self.page.locator(third_part_element).locator(
  7227. 'input.semi-switch-native-control').click()
  7228. print(f"[{self.platform_name}] 已开启头条/西瓜同步")
  7229. except:
  7230. pass
  7231. # 定时发布
  7232. if params.publish_date:
  7233. self.report_progress(70, "设置定时发布...")
  7234. await self.set_schedule_time(params.publish_date)
  7235. self.report_progress(80, "正在发布...")
  7236. print(f"[{self.platform_name}] 查找发布按钮...")
  7237. # 点击发布 - 参考 matrix
  7238. for i in range(30):
  7239. try:
  7240. # 检查验证码(不要在每次循环都调 AI,太慢)
  7241. if i % 5 == 0:
  7242. ai_captcha = await self.ai_check_captcha()
  7243. if ai_captcha['has_captcha']:
  7244. print(f"[{self.platform_name}] AI检测到发布过程中需要验证码: {ai_captcha['captcha_type']}", flush=True)
  7245. if ai_captcha['captcha_type'] == 'phone':
  7246. handled = await self.handle_phone_captcha()
  7247. if handled:
  7248. continue
  7249. screenshot_base64 = await self.capture_screenshot()
  7250. page_url = await self.get_page_url()
  7251. return PublishResult(
  7252. success=False,
  7253. platform=self.platform_name,
  7254. error=f"发布过程中需要{ai_captcha['captcha_type']}验证码,请使用有头浏览器完成验证",
  7255. need_captcha=True,
  7256. captcha_type=ai_captcha['captcha_type'],
  7257. screenshot_base64=screenshot_base64,
  7258. page_url=page_url,
  7259. status='need_captcha'
  7260. )
  7261. publish_btn = self.page.get_by_role('button', name="发布", exact=True)
  7262. btn_count = await publish_btn.count()
  7263. if btn_count > 0:
  7264. print(f"[{self.platform_name}] 点击发布按钮...")
  7265. await publish_btn.click()
  7266. # 等待跳转到内容管理页面 - 参考 matrix
  7267. await self.page.wait_for_url(
  7268. "https://creator.douyin.com/creator-micro/content/manage",
  7269. timeout=5000
  7270. )
  7271. self.report_progress(100, "发布成功")
  7272. print(f"[{self.platform_name}] 视频发布成功!")
  7273. screenshot_base64 = await self.capture_screenshot()
  7274. page_url = await self.get_page_url()
  7275. return PublishResult(
  7276. success=True,
  7277. platform=self.platform_name,
  7278. message="发布成功",
  7279. screenshot_base64=screenshot_base64,
  7280. page_url=page_url,
  7281. status='success'
  7282. )
  7283. except Exception as e:
  7284. current_url = self.page.url
  7285. # 检查是否已经在管理页面
  7286. if "https://creator.douyin.com/creator-micro/content/manage" in current_url:
  7287. self.report_progress(100, "发布成功")
  7288. print(f"[{self.platform_name}] 视频发布成功!")
  7289. screenshot_base64 = await self.capture_screenshot()
  7290. return PublishResult(
  7291. success=True,
  7292. platform=self.platform_name,
  7293. message="发布成功",
  7294. screenshot_base64=screenshot_base64,
  7295. page_url=current_url,
  7296. status='success'
  7297. )
  7298. else:
  7299. print(f"[{self.platform_name}] 视频正在发布中... {i+1}/30, URL: {current_url}")
  7300. await asyncio.sleep(1)
  7301. # 发布超时
  7302. print(f"[{self.platform_name}] 发布超时,获取截图...")
  7303. screenshot_base64 = await self.capture_screenshot()
  7304. page_url = await self.get_page_url()
  7305. return PublishResult(
  7306. success=False,
  7307. platform=self.platform_name,
  7308. error="发布超时,请检查发布状态",
  7309. screenshot_base64=screenshot_base64,
  7310. page_url=page_url,
  7311. status='need_action'
  7312. )
  7313. async def get_works(self, cookies: str, page: int = 0, page_size: int = 20) -> WorksResult:
  7314. """获取抖音作品列表
  7315. Args:
  7316. cookies: Cookie 字符串或 JSON
  7317. page: 分页参数,首次请求传 0,后续传上一次返回的 next_page(即 API 的 max_cursor)
  7318. page_size: 每页数量
  7319. Returns:
  7320. WorksResult: 包含 works, total, has_more, next_page(用于下一页请求)
  7321. """
  7322. print(f"\n{'='*60}")
  7323. print(f"[{self.platform_name}] 获取作品列表")
  7324. print(f"[{self.platform_name}] cursor={page}, page_size={page_size}")
  7325. print(f"{'='*60}")
  7326. works: List[WorkItem] = []
  7327. total = 0
  7328. has_more = False
  7329. next_cursor = 0
  7330. try:
  7331. await self.init_browser()
  7332. cookie_list = self.parse_cookies(cookies)
  7333. await self.set_cookies(cookie_list)
  7334. if not self.page:
  7335. raise Exception("Page not initialized")
  7336. # 访问创作者中心首页以触发登录验证
  7337. await self.page.goto("https://creator.douyin.com/creator-micro/home")
  7338. await asyncio.sleep(3)
  7339. # 检查登录状态
  7340. current_url = self.page.url
  7341. if "login" in current_url or "passport" in current_url:
  7342. raise Exception("Cookie 已过期,请重新登录")
  7343. # 调用作品列表 API:page 作为 max_cursor(首次 0,后续为上一页返回的 max_cursor)
  7344. max_cursor = page
  7345. api_url = f"https://creator.douyin.com/janus/douyin/creator/pc/work_list?status=0&device_platform=android&count={page_size}&max_cursor={max_cursor}&cookie_enabled=true&browser_language=zh-CN&browser_platform=Win32&browser_name=Mozilla&browser_online=true&timezone_name=Asia%2FShanghai"
  7346. response = await self.page.evaluate(f'''
  7347. async () => {{
  7348. try {{
  7349. const resp = await fetch("{api_url}", {{
  7350. credentials: 'include',
  7351. headers: {{ 'Accept': 'application/json' }}
  7352. }});
  7353. return await resp.json();
  7354. }} catch (e) {{
  7355. return {{ error: e.toString() }};
  7356. }}
  7357. }}
  7358. ''')
  7359. if response.get('error'):
  7360. print(f"[{self.platform_name}] API 请求失败: {response.get('error')}", flush=True)
  7361. aweme_list = response.get('aweme_list', []) or []
  7362. has_more = response.get('has_more', False)
  7363. # 下一页游标:优先 max_cursor,兼容 next_cursor(与创作者中心 work_list 一致)
  7364. next_cursor = response.get('max_cursor') if 'max_cursor' in response else response.get('next_cursor')
  7365. if next_cursor is None:
  7366. next_cursor = 0
  7367. # 从第一个作品的 author.aweme_count 获取总作品数
  7368. if aweme_list and len(aweme_list) > 0:
  7369. first_aweme = aweme_list[0]
  7370. author_aweme_count = first_aweme.get('author', {}).get('aweme_count', 0)
  7371. if author_aweme_count > 0:
  7372. total = author_aweme_count
  7373. print(f"[{self.platform_name}] 从 author.aweme_count 获取总作品数: {total}")
  7374. print(f"[{self.platform_name}] API 响应: has_more={has_more}, aweme_list={len(aweme_list)}, next_cursor={next_cursor}")
  7375. for aweme in aweme_list:
  7376. aweme_id = str(aweme.get('aweme_id', ''))
  7377. if not aweme_id:
  7378. continue
  7379. statistics = aweme.get('statistics', {})
  7380. # 打印调试信息,确认字段存在
  7381. # print(f"[{self.platform_name}] 作品 {aweme_id} 统计: {statistics}", flush=True)
  7382. # 获取封面
  7383. cover_url = ''
  7384. if aweme.get('Cover', {}).get('url_list'):
  7385. cover_url = aweme['Cover']['url_list'][0]
  7386. elif aweme.get('video', {}).get('cover', {}).get('url_list'):
  7387. cover_url = aweme['video']['cover']['url_list'][0]
  7388. # 获取标题
  7389. title = aweme.get('item_title', '') or aweme.get('desc', '').split('\n')[0][:50] or '无标题'
  7390. # 获取时长(毫秒转秒)
  7391. duration = aweme.get('video', {}).get('duration', 0) // 1000
  7392. # 获取发布时间
  7393. create_time = aweme.get('create_time', 0)
  7394. publish_time = datetime.fromtimestamp(create_time).strftime('%Y-%m-%d %H:%M:%S') if create_time else ''
  7395. # 入库 video_url 使用 play_addr.url_list 的第一项,无则用分享页链接
  7396. url_list = (aweme.get('video') or {}).get('play_addr', {}).get('url_list') or []
  7397. video_url = url_list[0] if url_list else (f"https://www.douyin.com/video/{aweme_id}" if aweme_id else "")
  7398. works.append(WorkItem(
  7399. work_id=aweme_id,
  7400. title=title,
  7401. cover_url=cover_url,
  7402. video_url=video_url,
  7403. duration=duration,
  7404. status='published',
  7405. publish_time=publish_time,
  7406. play_count=int(statistics.get('play_count', 0)),
  7407. like_count=int(statistics.get('digg_count', 0)),
  7408. comment_count=int(statistics.get('comment_count', 0)),
  7409. share_count=int(statistics.get('share_count', 0)),
  7410. collect_count=int(statistics.get('collect_count', 0)),
  7411. ))
  7412. if total == 0:
  7413. total = len(works)
  7414. print(f"[{self.platform_name}] 本页获取到 {len(works)} 个作品")
  7415. except Exception as e:
  7416. import traceback
  7417. traceback.print_exc()
  7418. return WorksResult(
  7419. success=False,
  7420. platform=self.platform_name,
  7421. error=str(e)
  7422. )
  7423. return WorksResult(
  7424. success=True,
  7425. platform=self.platform_name,
  7426. works=works,
  7427. total=total,
  7428. has_more=has_more,
  7429. next_page=next_cursor
  7430. )
  7431. async def get_comments(self, cookies: str, work_id: str, cursor: str = "") -> CommentsResult:
  7432. """获取抖音作品评论 - 通过访问视频详情页拦截评论 API"""
  7433. print(f"\n{'='*60}")
  7434. print(f"[{self.platform_name}] 获取作品评论")
  7435. print(f"[{self.platform_name}] work_id={work_id}, cursor={cursor}")
  7436. print(f"{'='*60}")
  7437. comments: List[CommentItem] = []
  7438. total = 0
  7439. has_more = False
  7440. next_cursor = ""
  7441. captured_data = {}
  7442. try:
  7443. await self.init_browser()
  7444. cookie_list = self.parse_cookies(cookies)
  7445. await self.set_cookies(cookie_list)
  7446. if not self.page:
  7447. raise Exception("Page not initialized")
  7448. # 设置 API 响应监听器
  7449. async def handle_response(response):
  7450. nonlocal captured_data
  7451. url = response.url
  7452. # 监听评论列表 API - 抖音视频页面使用的 API
  7453. # /aweme/v1/web/comment/list/ 或 /comment/list/
  7454. if '/comment/list' in url and ('aweme_id' in url or work_id in url):
  7455. try:
  7456. json_data = await response.json()
  7457. print(f"[{self.platform_name}] 捕获到评论 API: {url[:100]}...", flush=True)
  7458. # 检查响应是否成功
  7459. if json_data.get('status_code') == 0 or json_data.get('comments'):
  7460. captured_data = json_data
  7461. comment_count = len(json_data.get('comments', []))
  7462. print(f"[{self.platform_name}] 评论 API 响应成功: comments={comment_count}, has_more={json_data.get('has_more')}", flush=True)
  7463. except Exception as e:
  7464. print(f"[{self.platform_name}] 解析评论响应失败: {e}", flush=True)
  7465. self.page.on('response', handle_response)
  7466. print(f"[{self.platform_name}] 已注册评论 API 响应监听器", flush=True)
  7467. # 访问视频详情页 - 这会自动触发评论 API 请求
  7468. video_url = f"https://www.douyin.com/video/{work_id}"
  7469. print(f"[{self.platform_name}] 访问视频详情页: {video_url}", flush=True)
  7470. await self.page.goto(video_url, wait_until="domcontentloaded", timeout=30000)
  7471. await asyncio.sleep(5)
  7472. # 检查登录状态
  7473. current_url = self.page.url
  7474. if "login" in current_url or "passport" in current_url:
  7475. raise Exception("Cookie 已过期,请重新登录")
  7476. # 等待评论加载
  7477. if not captured_data:
  7478. print(f"[{self.platform_name}] 等待评论 API 响应...", flush=True)
  7479. # 尝试滚动页面触发评论加载
  7480. await self.page.evaluate('window.scrollBy(0, 300)')
  7481. await asyncio.sleep(3)
  7482. if not captured_data:
  7483. # 再等待一会
  7484. await asyncio.sleep(3)
  7485. # 移除监听器
  7486. self.page.remove_listener('response', handle_response)
  7487. # 解析评论数据
  7488. if captured_data:
  7489. comment_list = captured_data.get('comments') or []
  7490. has_more = captured_data.get('has_more', False) or captured_data.get('has_more', 0) == 1
  7491. next_cursor = str(captured_data.get('cursor', ''))
  7492. total = captured_data.get('total', 0) or len(comment_list)
  7493. print(f"[{self.platform_name}] 解析评论: total={total}, has_more={has_more}, comments={len(comment_list)}", flush=True)
  7494. for comment in comment_list:
  7495. cid = str(comment.get('cid', ''))
  7496. if not cid:
  7497. continue
  7498. user = comment.get('user', {})
  7499. # 解析回复列表
  7500. replies = []
  7501. reply_list = comment.get('reply_comment', []) or []
  7502. for reply in reply_list:
  7503. reply_user = reply.get('user', {})
  7504. replies.append(CommentItem(
  7505. comment_id=str(reply.get('cid', '')),
  7506. work_id=work_id,
  7507. content=reply.get('text', ''),
  7508. author_id=str(reply_user.get('uid', '')),
  7509. author_name=reply_user.get('nickname', ''),
  7510. author_avatar=reply_user.get('avatar_thumb', {}).get('url_list', [''])[0] if reply_user.get('avatar_thumb') else '',
  7511. like_count=int(reply.get('digg_count', 0)),
  7512. create_time=datetime.fromtimestamp(reply.get('create_time', 0)).strftime('%Y-%m-%d %H:%M:%S') if reply.get('create_time') else '',
  7513. is_author=reply.get('is_author', False),
  7514. ))
  7515. comments.append(CommentItem(
  7516. comment_id=cid,
  7517. work_id=work_id,
  7518. content=comment.get('text', ''),
  7519. author_id=str(user.get('uid', '')),
  7520. author_name=user.get('nickname', ''),
  7521. author_avatar=user.get('avatar_thumb', {}).get('url_list', [''])[0] if user.get('avatar_thumb') else '',
  7522. like_count=int(comment.get('digg_count', 0)),
  7523. reply_count=int(comment.get('reply_comment_total', 0)),
  7524. create_time=datetime.fromtimestamp(comment.get('create_time', 0)).strftime('%Y-%m-%d %H:%M:%S') if comment.get('create_time') else '',
  7525. is_author=comment.get('is_author', False),
  7526. replies=replies,
  7527. ))
  7528. print(f"[{self.platform_name}] 解析到 {len(comments)} 条评论", flush=True)
  7529. else:
  7530. print(f"[{self.platform_name}] 未捕获到评论 API 响应", flush=True)
  7531. except Exception as e:
  7532. import traceback
  7533. traceback.print_exc()
  7534. return CommentsResult(
  7535. success=False,
  7536. platform=self.platform_name,
  7537. work_id=work_id,
  7538. error=str(e)
  7539. )
  7540. finally:
  7541. await self.close_browser()
  7542. result = CommentsResult(
  7543. success=True,
  7544. platform=self.platform_name,
  7545. work_id=work_id,
  7546. comments=comments,
  7547. total=total,
  7548. has_more=has_more
  7549. )
  7550. result.__dict__['cursor'] = next_cursor
  7551. return result
  7552. async def get_all_comments(self, cookies: str) -> dict:
  7553. """获取所有作品的评论 - 通过评论管理页面"""
  7554. print(f"\n{'='*60}")
  7555. print(f"[{self.platform_name}] 获取所有作品评论")
  7556. print(f"{'='*60}")
  7557. all_work_comments = []
  7558. captured_comments = []
  7559. captured_works = {} # work_id -> work_info
  7560. try:
  7561. await self.init_browser()
  7562. cookie_list = self.parse_cookies(cookies)
  7563. await self.set_cookies(cookie_list)
  7564. if not self.page:
  7565. raise Exception("Page not initialized")
  7566. # 设置 API 响应监听器
  7567. async def handle_response(response):
  7568. nonlocal captured_comments, captured_works
  7569. url = response.url
  7570. try:
  7571. # 监听评论列表 API - 多种格式
  7572. # /comment/list/select/ 或 /comment/read 或 /creator/comment/list
  7573. if '/comment/list' in url or '/comment/read' in url or 'comment_list' in url:
  7574. json_data = await response.json()
  7575. print(f"[{self.platform_name}] 捕获到评论 API: {url[:100]}...", flush=True)
  7576. # 格式1: comments 字段
  7577. comments = json_data.get('comments', [])
  7578. # 格式2: comment_info_list 字段
  7579. if not comments:
  7580. comments = json_data.get('comment_info_list', [])
  7581. if comments:
  7582. # 从 URL 中提取 aweme_id
  7583. import re
  7584. aweme_id_match = re.search(r'aweme_id=(\d+)', url)
  7585. aweme_id = aweme_id_match.group(1) if aweme_id_match else ''
  7586. for comment in comments:
  7587. # 添加 aweme_id 到评论中
  7588. if aweme_id and 'aweme_id' not in comment:
  7589. comment['aweme_id'] = aweme_id
  7590. captured_comments.append(comment)
  7591. print(f"[{self.platform_name}] 捕获到 {len(comments)} 条评论 (aweme_id={aweme_id}),总计: {len(captured_comments)}", flush=True)
  7592. # 监听作品列表 API
  7593. if '/work_list' in url or '/item/list' in url or '/creator/item' in url:
  7594. json_data = await response.json()
  7595. aweme_list = json_data.get('aweme_list', []) or json_data.get('item_info_list', []) or json_data.get('item_list', [])
  7596. print(f"[{self.platform_name}] 捕获到作品列表 API: {len(aweme_list)} 个作品", flush=True)
  7597. for aweme in aweme_list:
  7598. aweme_id = str(aweme.get('aweme_id', '') or aweme.get('item_id', '') or aweme.get('item_id_plain', ''))
  7599. if aweme_id:
  7600. cover_url = ''
  7601. if aweme.get('Cover', {}).get('url_list'):
  7602. cover_url = aweme['Cover']['url_list'][0]
  7603. elif aweme.get('video', {}).get('cover', {}).get('url_list'):
  7604. cover_url = aweme['video']['cover']['url_list'][0]
  7605. elif aweme.get('cover_image_url'):
  7606. cover_url = aweme['cover_image_url']
  7607. captured_works[aweme_id] = {
  7608. 'title': aweme.get('item_title', '') or aweme.get('title', '') or aweme.get('desc', ''),
  7609. 'cover': cover_url,
  7610. 'comment_count': aweme.get('statistics', {}).get('comment_count', 0) or aweme.get('comment_count', 0),
  7611. }
  7612. except Exception as e:
  7613. print(f"[{self.platform_name}] 解析响应失败: {e}", flush=True)
  7614. self.page.on('response', handle_response)
  7615. print(f"[{self.platform_name}] 已注册 API 响应监听器", flush=True)
  7616. # 访问评论管理页面
  7617. print(f"[{self.platform_name}] 访问评论管理页面...", flush=True)
  7618. await self.page.goto("https://creator.douyin.com/creator-micro/interactive/comment", wait_until="domcontentloaded", timeout=30000)
  7619. await asyncio.sleep(5)
  7620. # 检查登录状态
  7621. current_url = self.page.url
  7622. if "login" in current_url or "passport" in current_url:
  7623. raise Exception("Cookie 已过期,请重新登录")
  7624. print(f"[{self.platform_name}] 页面加载完成,当前捕获: {len(captured_comments)} 条评论, {len(captured_works)} 个作品", flush=True)
  7625. # 尝试点击"选择作品"来加载作品列表
  7626. try:
  7627. select_btn = await self.page.query_selector('text="选择作品"')
  7628. if select_btn:
  7629. print(f"[{self.platform_name}] 点击选择作品按钮...", flush=True)
  7630. await select_btn.click()
  7631. await asyncio.sleep(3)
  7632. # 获取作品列表
  7633. work_items = await self.page.query_selector_all('[class*="work-item"], [class*="video-item"], [class*="aweme-item"]')
  7634. print(f"[{self.platform_name}] 找到 {len(work_items)} 个作品元素", flush=True)
  7635. # 点击每个作品加载其评论
  7636. for i, item in enumerate(work_items[:10]): # 最多处理10个作品
  7637. try:
  7638. await item.click()
  7639. await asyncio.sleep(2)
  7640. print(f"[{self.platform_name}] 已点击作品 {i+1}/{min(len(work_items), 10)}", flush=True)
  7641. except:
  7642. pass
  7643. # 关闭选择作品弹窗
  7644. close_btn = await self.page.query_selector('[class*="close"], [class*="cancel"]')
  7645. if close_btn:
  7646. await close_btn.click()
  7647. await asyncio.sleep(1)
  7648. except Exception as e:
  7649. print(f"[{self.platform_name}] 选择作品操作失败: {e}", flush=True)
  7650. # 滚动加载更多评论
  7651. for i in range(5):
  7652. await self.page.evaluate('window.scrollBy(0, 500)')
  7653. await asyncio.sleep(1)
  7654. await asyncio.sleep(3)
  7655. # 移除监听器
  7656. self.page.remove_listener('response', handle_response)
  7657. print(f"[{self.platform_name}] 最终捕获: {len(captured_comments)} 条评论, {len(captured_works)} 个作品", flush=True)
  7658. # 按作品分组评论
  7659. work_comments_map = {} # work_id -> work_comments
  7660. for comment in captured_comments:
  7661. # 从评论中获取作品信息
  7662. aweme = comment.get('aweme', {}) or comment.get('item', {})
  7663. aweme_id = str(comment.get('aweme_id', '') or aweme.get('aweme_id', '') or aweme.get('item_id', ''))
  7664. if not aweme_id:
  7665. continue
  7666. if aweme_id not in work_comments_map:
  7667. work_info = captured_works.get(aweme_id, {})
  7668. work_comments_map[aweme_id] = {
  7669. 'work_id': aweme_id,
  7670. 'title': aweme.get('title', '') or aweme.get('desc', '') or work_info.get('title', ''),
  7671. 'cover_url': aweme.get('cover', {}).get('url_list', [''])[0] if aweme.get('cover') else work_info.get('cover', ''),
  7672. 'comments': []
  7673. }
  7674. cid = str(comment.get('cid', ''))
  7675. if not cid:
  7676. continue
  7677. user = comment.get('user', {})
  7678. work_comments_map[aweme_id]['comments'].append({
  7679. 'comment_id': cid,
  7680. 'author_id': str(user.get('uid', '')),
  7681. 'author_name': user.get('nickname', ''),
  7682. 'author_avatar': user.get('avatar_thumb', {}).get('url_list', [''])[0] if user.get('avatar_thumb') else '',
  7683. 'content': comment.get('text', ''),
  7684. 'like_count': int(comment.get('digg_count', 0)),
  7685. 'create_time': datetime.fromtimestamp(comment.get('create_time', 0)).strftime('%Y-%m-%d %H:%M:%S') if comment.get('create_time') else '',
  7686. 'is_author': comment.get('is_author', False),
  7687. })
  7688. all_work_comments = list(work_comments_map.values())
  7689. total_comments = sum(len(w['comments']) for w in all_work_comments)
  7690. print(f"[{self.platform_name}] 获取到 {len(all_work_comments)} 个作品的 {total_comments} 条评论", flush=True)
  7691. except Exception as e:
  7692. import traceback
  7693. traceback.print_exc()
  7694. return {
  7695. 'success': False,
  7696. 'platform': self.platform_name,
  7697. 'error': str(e),
  7698. 'work_comments': []
  7699. }
  7700. finally:
  7701. await self.close_browser()
  7702. return {
  7703. 'success': True,
  7704. 'platform': self.platform_name,
  7705. 'work_comments': all_work_comments,
  7706. 'total': len(all_work_comments)
  7707. }
  7708. async def auto_reply_private_messages(self, cookies: str) -> dict:
  7709. """自动回复抖音私信 - 适配新页面结构"""
  7710. print(f"\n{'='*60}")
  7711. print(f"[{self.platform_name}] 开始自动回复抖音私信")
  7712. print(f"{'='*60}")
  7713. try:
  7714. await self.init_browser()
  7715. cookie_list = self.parse_cookies(cookies)
  7716. await self.set_cookies(cookie_list)
  7717. if not self.page:
  7718. raise Exception("Page not initialized")
  7719. # 访问抖音私信页面
  7720. await self.page.goto("https://creator.douyin.com/creator-micro/data/following/chat", timeout=30000)
  7721. await asyncio.sleep(3)
  7722. # 检查登录状态
  7723. current_url = self.page.url
  7724. print(f"[{self.platform_name}] 当前 URL: {current_url}")
  7725. if "login" in current_url or "passport" in current_url:
  7726. raise Exception("Cookie 已过期,请重新登录")
  7727. replied_count = 0
  7728. # 处理两个tab: 陌生人私信 和 朋友私信
  7729. for tab_name in ["陌生人私信", "朋友私信"]:
  7730. print(f"\n{'='*50}")
  7731. print(f"[{self.platform_name}] 处理 {tab_name} ...")
  7732. print(f"{'='*50}")
  7733. # 点击对应tab
  7734. tab_locator = self.page.locator(f'div.semi-tabs-tab:text-is("{tab_name}")')
  7735. if await tab_locator.count() > 0:
  7736. await tab_locator.click()
  7737. await asyncio.sleep(2)
  7738. else:
  7739. print(f"⚠️ 未找到 {tab_name} 标签,跳过")
  7740. continue
  7741. # 获取私信列表
  7742. session_items = self.page.locator('.semi-list-item')
  7743. session_count = await session_items.count()
  7744. print(f"[{self.platform_name}] {tab_name} 共找到 {session_count} 条会话")
  7745. if session_count == 0:
  7746. print(f"[{self.platform_name}] {tab_name} 无新私信")
  7747. continue
  7748. for idx in range(session_count):
  7749. try:
  7750. # 重新获取列表(防止 DOM 变化)
  7751. current_sessions = self.page.locator('.semi-list-item')
  7752. if idx >= await current_sessions.count():
  7753. break
  7754. session = current_sessions.nth(idx)
  7755. user_name = await session.locator('.item-header-name-vL_79m').inner_text()
  7756. last_msg = await session.locator('.text-whxV9A').inner_text()
  7757. print(f"\n ➤ [{idx+1}/{session_count}] 处理用户: {user_name} | 最后消息: {last_msg[:30]}...")
  7758. # 检查会话预览消息是否包含非文字内容
  7759. if "分享" in last_msg and ("视频" in last_msg or "图片" in last_msg or "链接" in last_msg):
  7760. print(" ➤ 会话预览为非文字消息,跳过")
  7761. continue
  7762. # 点击进入聊天
  7763. await session.click()
  7764. await asyncio.sleep(2)
  7765. # 提取聊天历史(判断最后一条是否是自己发的)
  7766. chat_messages = self.page.locator('.box-item-dSA1TJ:not(.time-Za5gKL)')
  7767. msg_count = await chat_messages.count()
  7768. should_reply = True
  7769. if msg_count > 0:
  7770. # 最后一条消息
  7771. last_msg_el = chat_messages.nth(msg_count - 1)
  7772. # 获取元素的 class 属性判断是否是自己发的
  7773. classes = await last_msg_el.get_attribute('class') or ''
  7774. is_my_message = 'is-me-' in classes # 包含 is-me- 表示是自己发的
  7775. should_reply = not is_my_message # 如果是自己发的就不回复
  7776. if should_reply:
  7777. # 提取完整聊天历史
  7778. chat_history = await self._extract_chat_history()
  7779. if chat_history:
  7780. # 生成回复
  7781. reply_text = await self._generate_reply_with_ai(chat_history)
  7782. if not reply_text:
  7783. reply_text = self._generate_reply(chat_history)
  7784. if reply_text:
  7785. print(f" 📝 回复内容: {reply_text}")
  7786. # 填充输入框
  7787. input_box = self.page.locator('div.chat-input-dccKiL[contenteditable="true"]')
  7788. send_btn = self.page.locator('button:has-text("发送")')
  7789. if await input_box.is_visible() and await send_btn.is_visible():
  7790. await input_box.fill(reply_text)
  7791. await asyncio.sleep(0.5)
  7792. await send_btn.click()
  7793. print(" ✅ 已发送")
  7794. replied_count += 1
  7795. await asyncio.sleep(2)
  7796. else:
  7797. print(" ❌ 输入框或发送按钮不可见")
  7798. else:
  7799. print(" ➤ 无需回复")
  7800. else:
  7801. print(" ➤ 聊天历史为空,跳过")
  7802. else:
  7803. print(" ➤ 最后一条是我发的,跳过")
  7804. except Exception as e:
  7805. print(f" ❌ 处理会话 {idx+1} 时出错: {e}")
  7806. continue
  7807. print(f"[{self.platform_name}] 自动回复完成,共回复 {replied_count} 条消息")
  7808. return {
  7809. 'success': True,
  7810. 'platform': self.platform_name,
  7811. 'replied_count': replied_count,
  7812. 'message': f'成功回复 {replied_count} 条私信'
  7813. }
  7814. except Exception as e:
  7815. import traceback
  7816. traceback.print_exc()
  7817. return {
  7818. 'success': False,
  7819. 'platform': self.platform_name,
  7820. 'error': str(e)
  7821. }
  7822. finally:
  7823. await self.close_browser()
  7824. # 辅助方法保持兼容(可复用)
  7825. def _generate_reply(self, chat_history: list) -> str:
  7826. """规则回复"""
  7827. if not chat_history:
  7828. return "你好!感谢联系~"
  7829. last_msg = chat_history[-1]["content"]
  7830. if "谢谢" in last_msg or "感谢" in last_msg:
  7831. return "不客气!欢迎常来交流~"
  7832. elif "你好" in last_msg or "在吗" in last_msg:
  7833. return "你好!请问有什么可以帮您的?"
  7834. elif "视频" in last_msg or "怎么拍" in last_msg:
  7835. return "视频是用手机拍摄的,注意光线和稳定哦!"
  7836. else:
  7837. return "收到!我会认真阅读您的留言~"
  7838. async def _extract_chat_history(self) -> list:
  7839. """精准提取聊天记录,区分作者(自己)和用户"""
  7840. if not self.page:
  7841. return []
  7842. history = []
  7843. # 获取所有聊天消息(排除时间戳元素)
  7844. message_wrappers = self.page.locator('.box-item-dSA1TJ:not(.time-Za5gKL)')
  7845. count = await message_wrappers.count()
  7846. for i in range(count):
  7847. try:
  7848. wrapper = message_wrappers.nth(i)
  7849. # 检查是否为自己发送的消息
  7850. classes = await wrapper.get_attribute('class') or ''
  7851. is_author = 'is-me-' in classes # 包含 is-me- 表示是自己发的
  7852. # 获取消息文本内容
  7853. text_element = wrapper.locator('.text-X2d7fS')
  7854. if await text_element.count() > 0:
  7855. content = await text_element.inner_text()
  7856. content = content.strip()
  7857. if content: # 只添加非空消息
  7858. # 获取用户名(如果是对方消息)
  7859. author_name = ''
  7860. if not is_author:
  7861. # 尝试获取对方用户名
  7862. name_elements = wrapper.locator('.aweme-author-name-m8uoXU')
  7863. if await name_elements.count() > 0:
  7864. author_name = await name_elements.nth(0).inner_text()
  7865. else:
  7866. author_name = '用户'
  7867. else:
  7868. author_name = '我'
  7869. history.append({
  7870. "author": author_name,
  7871. "content": content,
  7872. "is_author": is_author,
  7873. })
  7874. except Exception as e:
  7875. print(f" ⚠️ 解析第 {i+1} 条消息失败: {e}")
  7876. continue
  7877. return history
  7878. async def _generate_reply_with_ai(self, chat_history: list) -> str:
  7879. """使用 AI 生成回复(保留原逻辑)"""
  7880. import os, requests, json
  7881. try:
  7882. ai_api_key = os.environ.get('DASHSCOPE_API_KEY', '')
  7883. ai_base_url = os.environ.get('DASHSCOPE_BASE_URL', 'https://dashscope.aliyuncs.com/compatible-mode/v1')
  7884. ai_model = os.environ.get('AI_MODEL', 'qwen-plus')
  7885. if not ai_api_key:
  7886. return self._generate_reply(chat_history)
  7887. messages = [{"role": "system", "content": "你是一个友好的抖音创作者助手,负责回复粉丝私信。请保持简洁、友好、专业的语气。回复长度不超过20字。"}]
  7888. for msg in chat_history:
  7889. role = "assistant" if msg.get("is_author", False) else "user"
  7890. messages.append({"role": role, "content": msg["content"]})
  7891. headers = {'Authorization': f'Bearer {ai_api_key}', 'Content-Type': 'application/json'}
  7892. payload = {"model": ai_model, "messages": messages, "max_tokens": 150, "temperature": 0.8}
  7893. response = requests.post(f"{ai_base_url}/chat/completions", headers=headers, json=payload, timeout=30)
  7894. if response.status_code == 200:
  7895. ai_reply = response.json().get('choices', [{}])[0].get('message', {}).get('content', '').strip()
  7896. return ai_reply if ai_reply else self._generate_reply(chat_history)
  7897. else:
  7898. return self._generate_reply(chat_history)
  7899. except:
  7900. return self._generate_reply(chat_history)
  7901. async def get_work_comments_mapping(self, cookies: str) -> dict:
  7902. """获取所有作品及其评论的对应关系
  7903. Args:
  7904. cookies: 抖音创作者平台的cookies
  7905. Returns:
  7906. dict: 包含作品和评论对应关系的JSON数据
  7907. """
  7908. print(f"\n{'='*60}")
  7909. print(f"[{self.platform_name}] 获取作品和评论对应关系")
  7910. print(f"{'='*60}")
  7911. work_comments_mapping = []
  7912. try:
  7913. await self.init_browser()
  7914. cookie_list = self.parse_cookies(cookies)
  7915. await self.set_cookies(cookie_list)
  7916. if not self.page:
  7917. raise Exception("Page not initialized")
  7918. # 访问创作者中心首页
  7919. await self.page.goto("https://creator.douyin.com/creator-micro/home", timeout=30000)
  7920. await asyncio.sleep(3)
  7921. # 检查登录状态
  7922. current_url = self.page.url
  7923. if "login" in current_url or "passport" in current_url:
  7924. raise Exception("Cookie 已过期,请重新登录")
  7925. # 访问内容管理页面获取作品列表
  7926. print(f"[{self.platform_name}] 访问内容管理页面...")
  7927. await self.page.goto("https://creator.douyin.com/creator-micro/content/manage", timeout=30000)
  7928. await asyncio.sleep(5)
  7929. # 获取作品列表
  7930. works_result = await self.get_works(cookies, page=0, page_size=20)
  7931. if not works_result.success:
  7932. print(f"[{self.platform_name}] 获取作品列表失败: {works_result.error}")
  7933. return {
  7934. 'success': False,
  7935. 'platform': self.platform_name,
  7936. 'error': works_result.error,
  7937. 'work_comments': []
  7938. }
  7939. print(f"[{self.platform_name}] 获取到 {len(works_result.works)} 个作品")
  7940. # 对每个作品获取评论
  7941. for i, work in enumerate(works_result.works):
  7942. print(f"[{self.platform_name}] 正在获取作品 {i+1}/{len(works_result.works)} 的评论: {work.title[:20]}...")
  7943. # 获取单个作品的评论
  7944. comments_result = await self.get_comments(cookies, work.work_id)
  7945. if comments_result.success:
  7946. work_comments_mapping.append({
  7947. 'work_info': work.to_dict(),
  7948. 'comments': [comment.to_dict() for comment in comments_result.comments]
  7949. })
  7950. print(f"[{self.platform_name}] 作品 '{work.title[:20]}...' 获取到 {len(comments_result.comments)} 条评论")
  7951. else:
  7952. print(f"[{self.platform_name}] 获取作品 '{work.title[:20]}...' 评论失败: {comments_result.error}")
  7953. work_comments_mapping.append({
  7954. 'work_info': work.to_dict(),
  7955. 'comments': [],
  7956. 'error': comments_result.error
  7957. })
  7958. # 添加延时避免请求过于频繁
  7959. await asyncio.sleep(2)
  7960. print(f"[{self.platform_name}] 所有作品评论获取完成")
  7961. except Exception as e:
  7962. import traceback
  7963. traceback.print_exc()
  7964. return {
  7965. 'success': False,
  7966. 'platform': self.platform_name,
  7967. 'error': str(e),
  7968. 'work_comments': []
  7969. }
  7970. finally:
  7971. await self.close_browser()
  7972. return {
  7973. 'success': True,
  7974. 'platform': self.platform_name,
  7975. 'work_comments': work_comments_mapping,
  7976. 'summary': {
  7977. 'total_works': len(work_comments_mapping),
  7978. 'total_comments': sum(len(item['comments']) for item in work_comments_mapping),
  7979. }
  7980. }
  7981. ================================================================================
  7982. 文件: server\python\platforms\kuaishou.py
  7983. ================================================================================
  7984. # -*- coding: utf-8 -*-
  7985. """
  7986. 快手视频发布器
  7987. 参考: matrix/ks_uploader/main.py
  7988. """
  7989. import asyncio
  7990. import os
  7991. from datetime import datetime
  7992. from typing import List
  7993. from .base import (
  7994. BasePublisher, PublishParams, PublishResult,
  7995. WorkItem, WorksResult, CommentItem, CommentsResult
  7996. )
  7997. class KuaishouPublisher(BasePublisher):
  7998. """
  7999. 快手视频发布器
  8000. 使用 Playwright 自动化操作快手创作者中心
  8001. """
  8002. platform_name = "kuaishou"
  8003. login_url = "https://cp.kuaishou.com/"
  8004. publish_url = "https://cp.kuaishou.com/article/publish/video"
  8005. cookie_domain = ".kuaishou.com"
  8006. async def set_schedule_time(self, publish_date: datetime):
  8007. """设置定时发布"""
  8008. if not self.page:
  8009. return
  8010. # 选择定时发布
  8011. label_element = self.page.locator("label.radio--4Gpx6:has-text('定时发布')")
  8012. await label_element.click()
  8013. await asyncio.sleep(1)
  8014. # 输入时间
  8015. publish_date_str = publish_date.strftime("%Y-%m-%d %H:%M")
  8016. await self.page.locator('.semi-input[placeholder="日期和时间"]').click()
  8017. await self.page.keyboard.press("Control+KeyA")
  8018. await self.page.keyboard.type(str(publish_date_str))
  8019. await self.page.keyboard.press("Enter")
  8020. await asyncio.sleep(1)
  8021. async def upload_cover(self, cover_path: str):
  8022. """上传封面图"""
  8023. if not self.page or not cover_path or not os.path.exists(cover_path):
  8024. return
  8025. try:
  8026. await self.page.get_by_role("button", name="编辑封面").click()
  8027. await asyncio.sleep(1)
  8028. await self.page.get_by_role("tab", name="上传封面").click()
  8029. preview_div = self.page.get_by_role("tabpanel", name="上传封面").locator("div").nth(1)
  8030. async with self.page.expect_file_chooser() as fc_info:
  8031. await preview_div.click()
  8032. preview_chooser = await fc_info.value
  8033. await preview_chooser.set_files(cover_path)
  8034. await self.page.get_by_role("button", name="确认").click()
  8035. await asyncio.sleep(3)
  8036. print(f"[{self.platform_name}] 封面上传成功")
  8037. except Exception as e:
  8038. print(f"[{self.platform_name}] 封面上传失败: {e}")
  8039. async def publish(self, cookies: str, params: PublishParams) -> PublishResult:
  8040. """发布视频到快手 - 参考 matrix/ks_uploader/main.py"""
  8041. print(f"\n{'='*60}")
  8042. print(f"[{self.platform_name}] 开始发布视频")
  8043. print(f"[{self.platform_name}] 视频路径: {params.video_path}")
  8044. print(f"[{self.platform_name}] 标题: {params.title}")
  8045. print(f"[{self.platform_name}] Headless: {self.headless}")
  8046. print(f"{'='*60}")
  8047. self.report_progress(5, "正在初始化浏览器...")
  8048. # 初始化浏览器
  8049. await self.init_browser()
  8050. print(f"[{self.platform_name}] 浏览器初始化完成")
  8051. # 解析并设置 cookies
  8052. cookie_list = self.parse_cookies(cookies)
  8053. print(f"[{self.platform_name}] 解析到 {len(cookie_list)} 个 cookies")
  8054. await self.set_cookies(cookie_list)
  8055. if not self.page:
  8056. raise Exception("Page not initialized")
  8057. # 检查视频文件
  8058. if not os.path.exists(params.video_path):
  8059. raise Exception(f"视频文件不存在: {params.video_path}")
  8060. print(f"[{self.platform_name}] 视频文件存在,大小: {os.path.getsize(params.video_path)} bytes")
  8061. self.report_progress(10, "正在打开上传页面...")
  8062. # 访问上传页面 - 参考 matrix
  8063. await self.page.goto("https://cp.kuaishou.com/article/publish/video")
  8064. print(f"[{self.platform_name}] 等待页面加载...")
  8065. try:
  8066. await self.page.wait_for_url("https://cp.kuaishou.com/article/publish/video", timeout=30000)
  8067. except:
  8068. pass
  8069. await asyncio.sleep(3)
  8070. # 检查是否跳转到登录页
  8071. current_url = self.page.url
  8072. print(f"[{self.platform_name}] 当前 URL: {current_url}")
  8073. if "passport" in current_url or "login" in current_url:
  8074. screenshot_base64 = await self.capture_screenshot()
  8075. return PublishResult(
  8076. success=False,
  8077. platform=self.platform_name,
  8078. error="Cookie 已过期,需要重新登录",
  8079. need_captcha=True,
  8080. captcha_type='login',
  8081. screenshot_base64=screenshot_base64,
  8082. page_url=current_url,
  8083. status='need_captcha'
  8084. )
  8085. # 使用 AI 检查验证码
  8086. ai_captcha = await self.ai_check_captcha()
  8087. if ai_captcha['has_captcha']:
  8088. print(f"[{self.platform_name}] AI检测到验证码: {ai_captcha['captcha_type']}", flush=True)
  8089. screenshot_base64 = await self.capture_screenshot()
  8090. return PublishResult(
  8091. success=False,
  8092. platform=self.platform_name,
  8093. error=f"检测到{ai_captcha['captcha_type']}验证码,需要使用有头浏览器完成验证",
  8094. need_captcha=True,
  8095. captcha_type=ai_captcha['captcha_type'],
  8096. screenshot_base64=screenshot_base64,
  8097. page_url=current_url,
  8098. status='need_captcha'
  8099. )
  8100. self.report_progress(15, "正在选择视频文件...")
  8101. # 点击上传按钮 - 参考 matrix: page.get_by_role("button", name="上传视频")
  8102. upload_btn = self.page.get_by_role("button", name="上传视频")
  8103. async with self.page.expect_file_chooser(timeout=10000) as fc_info:
  8104. await upload_btn.click()
  8105. file_chooser = await fc_info.value
  8106. await file_chooser.set_files(params.video_path)
  8107. print(f"[{self.platform_name}] 视频文件已选择")
  8108. await asyncio.sleep(1)
  8109. # 关闭可能的弹窗 - 参考 matrix
  8110. known_btn = self.page.get_by_role("button", name="我知道了")
  8111. if await known_btn.count():
  8112. await known_btn.click()
  8113. print(f"[{self.platform_name}] 关闭弹窗")
  8114. self.report_progress(20, "正在填充标题...")
  8115. # 填写标题 - 参考 matrix
  8116. await asyncio.sleep(1)
  8117. title_input = self.page.get_by_placeholder('添加合适的话题和描述,作品能获得更多推荐~')
  8118. if await title_input.count():
  8119. await title_input.click()
  8120. await title_input.fill(params.title[:30])
  8121. print(f"[{self.platform_name}] 标题已填写")
  8122. self.report_progress(30, "等待视频上传完成...")
  8123. # 等待上传完成 - 参考 matrix: span:has-text("上传成功")
  8124. for i in range(120):
  8125. try:
  8126. count = await self.page.locator('span:has-text("上传成功")').count()
  8127. if count > 0:
  8128. print(f"[{self.platform_name}] 视频上传完毕")
  8129. break
  8130. else:
  8131. print(f"[{self.platform_name}] 正在上传视频中... {i+1}/120")
  8132. await asyncio.sleep(3)
  8133. except:
  8134. print(f"[{self.platform_name}] 正在上传视频中...")
  8135. await asyncio.sleep(3)
  8136. self.report_progress(50, "正在上传封面...")
  8137. # 上传封面 - 参考 matrix
  8138. await self.upload_cover(params.cover_path)
  8139. await asyncio.sleep(5)
  8140. self.report_progress(80, "正在发布...")
  8141. # 点击发布 - 参考 matrix
  8142. for i in range(30):
  8143. try:
  8144. publish_btn = self.page.get_by_role('button', name="发布", exact=True)
  8145. if await publish_btn.count():
  8146. print(f"[{self.platform_name}] 点击发布按钮...")
  8147. await publish_btn.click()
  8148. # 等待跳转到管理页面 - 参考 matrix: https://cp.kuaishou.com/article/manage/video?status=2&from=publish
  8149. await self.page.wait_for_url(
  8150. "https://cp.kuaishou.com/article/manage/video*",
  8151. timeout=1500
  8152. )
  8153. self.report_progress(100, "发布成功")
  8154. print(f"[{self.platform_name}] 视频发布成功!")
  8155. screenshot_base64 = await self.capture_screenshot()
  8156. return PublishResult(
  8157. success=True,
  8158. platform=self.platform_name,
  8159. message="发布成功",
  8160. screenshot_base64=screenshot_base64,
  8161. page_url=self.page.url,
  8162. status='success'
  8163. )
  8164. except Exception as e:
  8165. current_url = self.page.url
  8166. if "manage/video" in current_url:
  8167. self.report_progress(100, "发布成功")
  8168. print(f"[{self.platform_name}] 视频发布成功!")
  8169. screenshot_base64 = await self.capture_screenshot()
  8170. return PublishResult(
  8171. success=True,
  8172. platform=self.platform_name,
  8173. message="发布成功",
  8174. screenshot_base64=screenshot_base64,
  8175. page_url=current_url,
  8176. status='success'
  8177. )
  8178. else:
  8179. print(f"[{self.platform_name}] 视频正在发布中... {i+1}/30")
  8180. await asyncio.sleep(0.5)
  8181. # 发布超时
  8182. screenshot_base64 = await self.capture_screenshot()
  8183. page_url = await self.get_page_url()
  8184. return PublishResult(
  8185. success=False,
  8186. platform=self.platform_name,
  8187. error="发布超时,请检查发布状态",
  8188. screenshot_base64=screenshot_base64,
  8189. page_url=page_url,
  8190. status='need_action'
  8191. )
  8192. async def get_works(self, cookies: str, page: int = 0, page_size: int = 20) -> WorksResult:
  8193. """获取快手作品列表"""
  8194. print(f"\n{'='*60}")
  8195. print(f"[{self.platform_name}] 获取作品列表")
  8196. print(f"[{self.platform_name}] page={page}, page_size={page_size}")
  8197. print(f"{'='*60}")
  8198. works: List[WorkItem] = []
  8199. total = 0
  8200. has_more = False
  8201. try:
  8202. await self.init_browser()
  8203. cookie_list = self.parse_cookies(cookies)
  8204. await self.set_cookies(cookie_list)
  8205. if not self.page:
  8206. raise Exception("Page not initialized")
  8207. # 访问创作者中心
  8208. await self.page.goto("https://cp.kuaishou.com/")
  8209. await asyncio.sleep(3)
  8210. # 检查登录状态
  8211. current_url = self.page.url
  8212. if "passport" in current_url or "login" in current_url:
  8213. raise Exception("Cookie 已过期,请重新登录")
  8214. # 调用作品列表 API
  8215. pcursor = "" if page == 0 else str(page)
  8216. api_url = f"https://cp.kuaishou.com/rest/cp/works/v2/video/pc/photo/list?count={page_size}&pcursor={pcursor}&status=public"
  8217. js_code = f"""
  8218. async () => {{
  8219. const resp = await fetch("{api_url}", {{
  8220. credentials: 'include',
  8221. headers: {{ 'Accept': 'application/json' }}
  8222. }});
  8223. return await resp.json();
  8224. }}
  8225. """
  8226. response = await self.page.evaluate(js_code)
  8227. if response.get('result') == 1:
  8228. data = response.get('data', {})
  8229. photo_list = data.get('list', [])
  8230. has_more = len(photo_list) >= page_size
  8231. for photo in photo_list:
  8232. photo_id = photo.get('photoId', '')
  8233. if not photo_id:
  8234. continue
  8235. # 封面
  8236. cover_url = photo.get('coverUrl', '')
  8237. if cover_url.startswith('http://'):
  8238. cover_url = cover_url.replace('http://', 'https://')
  8239. # 时长
  8240. duration = photo.get('duration', 0) // 1000 # 毫秒转秒
  8241. # 发布时间
  8242. create_time = photo.get('timestamp', 0) // 1000
  8243. publish_time = ''
  8244. if create_time:
  8245. from datetime import datetime
  8246. publish_time = datetime.fromtimestamp(create_time).strftime('%Y-%m-%d %H:%M:%S')
  8247. works.append(WorkItem(
  8248. work_id=str(photo_id),
  8249. title=photo.get('caption', '') or '无标题',
  8250. cover_url=cover_url,
  8251. duration=duration,
  8252. status='published',
  8253. publish_time=publish_time,
  8254. play_count=photo.get('viewCount', 0),
  8255. like_count=photo.get('likeCount', 0),
  8256. comment_count=photo.get('commentCount', 0),
  8257. share_count=photo.get('shareCount', 0),
  8258. ))
  8259. print(f"[{self.platform_name}] 获取到 {len(works)} 个作品")
  8260. except Exception as e:
  8261. import traceback
  8262. traceback.print_exc()
  8263. return WorksResult(success=False, platform=self.platform_name, error=str(e))
  8264. return WorksResult(success=True, platform=self.platform_name, works=works, total=total or len(works), has_more=has_more)
  8265. async def get_comments(self, cookies: str, work_id: str, cursor: str = "") -> CommentsResult:
  8266. """获取快手作品评论"""
  8267. print(f"\n{'='*60}")
  8268. print(f"[{self.platform_name}] 获取作品评论")
  8269. print(f"[{self.platform_name}] work_id={work_id}")
  8270. print(f"{'='*60}")
  8271. comments: List[CommentItem] = []
  8272. total = 0
  8273. has_more = False
  8274. try:
  8275. await self.init_browser()
  8276. cookie_list = self.parse_cookies(cookies)
  8277. await self.set_cookies(cookie_list)
  8278. if not self.page:
  8279. raise Exception("Page not initialized")
  8280. await self.page.goto("https://cp.kuaishou.com/")
  8281. await asyncio.sleep(3)
  8282. current_url = self.page.url
  8283. if "passport" in current_url or "login" in current_url:
  8284. raise Exception("Cookie 已过期,请重新登录")
  8285. # 调用评论列表 API
  8286. pcursor = cursor or ""
  8287. api_url = f"https://cp.kuaishou.com/rest/cp/works/comment/list?photoId={work_id}&pcursor={pcursor}&count=20"
  8288. js_code = f"""
  8289. async () => {{
  8290. const resp = await fetch("{api_url}", {{
  8291. credentials: 'include',
  8292. headers: {{ 'Accept': 'application/json' }}
  8293. }});
  8294. return await resp.json();
  8295. }}
  8296. """
  8297. response = await self.page.evaluate(js_code)
  8298. if response.get('result') == 1:
  8299. data = response.get('data', {})
  8300. comment_list = data.get('list', [])
  8301. has_more = data.get('pcursor', '') != ''
  8302. for comment in comment_list:
  8303. cid = comment.get('commentId', '')
  8304. if not cid:
  8305. continue
  8306. author = comment.get('author', {})
  8307. # 解析子评论
  8308. replies = []
  8309. sub_list = comment.get('subComments', []) or []
  8310. for sub in sub_list:
  8311. sub_author = sub.get('author', {})
  8312. replies.append(CommentItem(
  8313. comment_id=str(sub.get('commentId', '')),
  8314. work_id=work_id,
  8315. content=sub.get('content', ''),
  8316. author_id=str(sub_author.get('id', '')),
  8317. author_name=sub_author.get('name', ''),
  8318. author_avatar=sub_author.get('headurl', ''),
  8319. like_count=sub.get('likeCount', 0),
  8320. create_time=str(sub.get('timestamp', '')),
  8321. ))
  8322. comments.append(CommentItem(
  8323. comment_id=str(cid),
  8324. work_id=work_id,
  8325. content=comment.get('content', ''),
  8326. author_id=str(author.get('id', '')),
  8327. author_name=author.get('name', ''),
  8328. author_avatar=author.get('headurl', ''),
  8329. like_count=comment.get('likeCount', 0),
  8330. reply_count=comment.get('subCommentCount', 0),
  8331. create_time=str(comment.get('timestamp', '')),
  8332. replies=replies,
  8333. ))
  8334. total = len(comments)
  8335. print(f"[{self.platform_name}] 获取到 {total} 条评论")
  8336. except Exception as e:
  8337. import traceback
  8338. traceback.print_exc()
  8339. return CommentsResult(success=False, platform=self.platform_name, work_id=work_id, error=str(e))
  8340. return CommentsResult(success=True, platform=self.platform_name, work_id=work_id, comments=comments, total=total, has_more=has_more)