hlm
/
multi-platform-media-manage


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644
							# -*- coding: utf-8 -*-
"""
微信视频号发布器
参考: matrix/tencent_uploader/main.py
"""

import asyncio
import os
from datetime import datetime
from typing import List
from .base import (
    BasePublisher, PublishParams, PublishResult,
    WorkItem, WorksResult, CommentItem, CommentsResult
)


def format_short_title(origin_title: str) -> str:
    """
    格式化短标题
    - 移除特殊字符
    - 长度限制在 6-16 字符
    """
    allowed_special_chars = "《》"":+?%°"
    
    filtered_chars = [
        char if char.isalnum() or char in allowed_special_chars 
        else ' ' if char == ',' else '' 
        for char in origin_title
    ]
    formatted_string = ''.join(filtered_chars)
    
    if len(formatted_string) > 16:
        formatted_string = formatted_string[:16]
    elif len(formatted_string) < 6:
        formatted_string += ' ' * (6 - len(formatted_string))
    
    return formatted_string


class WeixinPublisher(BasePublisher):
    """
    微信视频号发布器
    使用 Playwright 自动化操作视频号创作者中心
    注意: 需要使用 Chrome 浏览器，否则可能出现 H264 编码错误
    """
    
    platform_name = "weixin"
    login_url = "https://channels.weixin.qq.com/platform"
    publish_url = "https://channels.weixin.qq.com/platform/post/create"
    cookie_domain = ".weixin.qq.com"
    
    async def init_browser(self, storage_state: str = None):
        """初始化浏览器 - 使用 Chrome 浏览器"""
        from playwright.async_api import async_playwright
        
        playwright = await async_playwright().start()
        # 使用 Chrome 浏览器，避免 H264 编码问题
        self.browser = await playwright.chromium.launch(
            headless=self.headless, 
            channel="chrome"
        )
        
        if storage_state and os.path.exists(storage_state):
            self.context = await self.browser.new_context(storage_state=storage_state)
        else:
            self.context = await self.browser.new_context()
        
        self.page = await self.context.new_page()
        return self.page
    
    async def set_schedule_time(self, publish_date: datetime):
        """设置定时发布"""
        if not self.page:
            return
        
        print(f"[{self.platform_name}] 设置定时发布...")
        
        # 点击定时选项
        label_element = self.page.locator("label").filter(has_text="定时").nth(1)
        await label_element.click()
        
        # 选择日期
        await self.page.click('input[placeholder="请选择发表时间"]')
        
        publish_month = f"{publish_date.month:02d}"
        current_month = f"{publish_month}月"
        
        # 检查月份
        page_month = await self.page.inner_text('span.weui-desktop-picker__panel__label:has-text("月")')
        if page_month != current_month:
            await self.page.click('button.weui-desktop-btn__icon__right')
        
        # 选择日期
        elements = await self.page.query_selector_all('table.weui-desktop-picker__table a')
        for element in elements:
            class_name = await element.evaluate('el => el.className')
            if 'weui-desktop-picker__disabled' in class_name:
                continue
            text = await element.inner_text()
            if text.strip() == str(publish_date.day):
                await element.click()
                break
        
        # 输入时间
        await self.page.click('input[placeholder="请选择时间"]')
        await self.page.keyboard.press("Control+KeyA")
        await self.page.keyboard.type(str(publish_date.hour))
        
        # 点击其他地方确认
        await self.page.locator("div.input-editor").click()
    
    async def handle_upload_error(self, video_path: str):
        """处理上传错误"""
        if not self.page:
            return
        
        print(f"[{self.platform_name}] 视频出错了，重新上传中...")
        await self.page.locator('div.media-status-content div.tag-inner:has-text("删除")').click()
        await self.page.get_by_role('button', name="删除", exact=True).click()
        file_input = self.page.locator('input[type="file"]')
        await file_input.set_input_files(video_path)
    
    async def add_title_tags(self, params: PublishParams):
        """添加标题和话题"""
        if not self.page:
            return
        
        await self.page.locator("div.input-editor").click()
        await self.page.keyboard.type(params.title)
        
        if params.tags:
            await self.page.keyboard.press("Enter")
            for tag in params.tags:
                await self.page.keyboard.type("#" + tag)
                await self.page.keyboard.press("Space")
        
        print(f"[{self.platform_name}] 成功添加标题和 {len(params.tags)} 个话题")
    
    async def add_short_title(self):
        """添加短标题"""
        if not self.page:
            return
        
        try:
            short_title_element = self.page.get_by_text("短标题", exact=True).locator("..").locator(
                "xpath=following-sibling::div").locator('span input[type="text"]')
            if await short_title_element.count():
                # 获取已有内容作为短标题
                pass
        except:
            pass
    
    async def upload_cover(self, cover_path: str):
        """上传封面图"""
        if not self.page or not cover_path or not os.path.exists(cover_path):
            return
        
        try:
            await asyncio.sleep(2)
            preview_btn_info = await self.page.locator(
                'div.finder-tag-wrap.btn:has-text("更换封面")').get_attribute('class')
            
            if "disabled" not in preview_btn_info:
                await self.page.locator('div.finder-tag-wrap.btn:has-text("更换封面")').click()
                await self.page.locator('div.single-cover-uploader-wrap > div.wrap').hover()
                
                # 删除现有封面
                if await self.page.locator(".del-wrap > .svg-icon").count():
                    await self.page.locator(".del-wrap > .svg-icon").click()
                
                # 上传新封面
                preview_div = self.page.locator("div.single-cover-uploader-wrap > div.wrap")
                async with self.page.expect_file_chooser() as fc_info:
                    await preview_div.click()
                preview_chooser = await fc_info.value
                await preview_chooser.set_files(cover_path)
                
                await asyncio.sleep(2)
                await self.page.get_by_role("button", name="确定").click()
                await asyncio.sleep(1)
                await self.page.get_by_role("button", name="确认").click()
                
                print(f"[{self.platform_name}] 封面上传成功")
        except Exception as e:
            print(f"[{self.platform_name}] 封面上传失败: {e}")
    
    async def check_captcha(self) -> dict:
        """检查页面是否需要验证码"""
        if not self.page:
            return {'need_captcha': False, 'captcha_type': ''}
        
        try:
            # 检查各种验证码
            captcha_selectors = [
                'text="请输入验证码"',
                'text="滑动验证"',
                '[class*="captcha"]',
                '[class*="verify"]',
            ]
            for selector in captcha_selectors:
                try:
                    if await self.page.locator(selector).count() > 0:
                        print(f"[{self.platform_name}] 检测到验证码: {selector}")
                        return {'need_captcha': True, 'captcha_type': 'image'}
                except:
                    pass
            
            # 检查登录弹窗
            login_selectors = [
                'text="请登录"',
                'text="扫码登录"',
                '[class*="login-dialog"]',
            ]
            for selector in login_selectors:
                try:
                    if await self.page.locator(selector).count() > 0:
                        print(f"[{self.platform_name}] 检测到需要登录: {selector}")
                        return {'need_captcha': True, 'captcha_type': 'login'}
                except:
                    pass
                    
        except Exception as e:
            print(f"[{self.platform_name}] 验证码检测异常: {e}")
        
        return {'need_captcha': False, 'captcha_type': ''}

    async def publish(self, cookies: str, params: PublishParams) -> PublishResult:
        """发布视频到视频号"""
        print(f"\n{'='*60}")
        print(f"[{self.platform_name}] 开始发布视频")
        print(f"[{self.platform_name}] 视频路径: {params.video_path}")
        print(f"[{self.platform_name}] 标题: {params.title}")
        print(f"[{self.platform_name}] Headless: {self.headless}")
        print(f"{'='*60}")
        
        self.report_progress(5, "正在初始化浏览器...")
        
        # 初始化浏览器（使用 Chrome）
        await self.init_browser()
        print(f"[{self.platform_name}] 浏览器初始化完成")
        
        # 解析并设置 cookies
        cookie_list = self.parse_cookies(cookies)
        print(f"[{self.platform_name}] 解析到 {len(cookie_list)} 个 cookies")
        await self.set_cookies(cookie_list)
        
        if not self.page:
            raise Exception("Page not initialized")
        
        # 检查视频文件
        if not os.path.exists(params.video_path):
            raise Exception(f"视频文件不存在: {params.video_path}")
        
        print(f"[{self.platform_name}] 视频文件存在，大小: {os.path.getsize(params.video_path)} bytes")
        
        self.report_progress(10, "正在打开上传页面...")
        
        # 访问上传页面
        await self.page.goto(self.publish_url, wait_until="domcontentloaded", timeout=60000)
        await asyncio.sleep(3)
        
        # 检查是否跳转到登录页
        current_url = self.page.url
        print(f"[{self.platform_name}] 当前页面: {current_url}")
        
        if "login" in current_url:
            screenshot_base64 = await self.capture_screenshot()
            return PublishResult(
                success=False,
                platform=self.platform_name,
                error="Cookie 已过期，需要重新登录",
                need_captcha=True,
                captcha_type='login',
                screenshot_base64=screenshot_base64,
                page_url=current_url,
                status='need_captcha'
            )
        
        # 检查验证码
        captcha_result = await self.check_captcha()
        if captcha_result['need_captcha']:
            screenshot_base64 = await self.capture_screenshot()
            return PublishResult(
                success=False,
                platform=self.platform_name,
                error=f"需要{captcha_result['captcha_type']}验证码",
                need_captcha=True,
                captcha_type=captcha_result['captcha_type'],
                screenshot_base64=screenshot_base64,
                page_url=current_url,
                status='need_captcha'
            )
        
        self.report_progress(15, "正在选择视频文件...")
        
        # 上传视频 - 尝试多种方式
        upload_success = False
        
        # 方法1: 直接通过 file input 上传
        try:
            file_inputs = await self.page.query_selector_all('input[type="file"]')
            print(f"[{self.platform_name}] 找到 {len(file_inputs)} 个文件输入")
            
            for file_input in file_inputs:
                try:
                    await file_input.set_input_files(params.video_path)
                    upload_success = True
                    print(f"[{self.platform_name}] 通过 file input 上传成功")
                    break
                except Exception as e:
                    print(f"[{self.platform_name}] file input 上传失败: {e}")
        except Exception as e:
            print(f"[{self.platform_name}] 查找 file input 失败: {e}")
        
        # 方法2: 点击上传区域
        if not upload_success:
            upload_selectors = [
                'div.upload-content',
                'div[class*="upload"]',
                'div[class*="drag-upload"]',
                'div.add-wrap',
                'div:has-text("上传视频")',
                '[class*="uploader"]',
            ]
            
            for selector in upload_selectors:
                if upload_success:
                    break
                try:
                    upload_area = self.page.locator(selector).first
                    if await upload_area.count() > 0:
                        print(f"[{self.platform_name}] 尝试点击上传区域: {selector}")
                        async with self.page.expect_file_chooser(timeout=10000) as fc_info:
                            await upload_area.click()
                        file_chooser = await fc_info.value
                        await file_chooser.set_files(params.video_path)
                        upload_success = True
                        print(f"[{self.platform_name}] 通过点击上传区域成功")
                        break
                except Exception as e:
                    print(f"[{self.platform_name}] 选择器 {selector} 失败: {e}")
        
        if not upload_success:
            screenshot_base64 = await self.capture_screenshot()
            return PublishResult(
                success=False,
                platform=self.platform_name,
                error="未找到上传入口",
                screenshot_base64=screenshot_base64,
                page_url=await self.get_page_url(),
                status='failed'
            )
        
        self.report_progress(20, "正在填充标题和话题...")
        
        # 添加标题和话题
        await self.add_title_tags(params)
        
        self.report_progress(30, "等待视频上传完成...")
        
        # 等待上传完成
        for _ in range(120):
            try:
                button_info = await self.page.get_by_role("button", name="发表").get_attribute('class')
                if "weui-desktop-btn_disabled" not in button_info:
                    print(f"[{self.platform_name}] 视频上传完毕")
                    
                    # 上传封面
                    self.report_progress(50, "正在上传封面...")
                    await self.upload_cover(params.cover_path)
                    break
                else:
                    # 检查上传错误
                    if await self.page.locator('div.status-msg.error').count():
                        if await self.page.locator('div.media-status-content div.tag-inner:has-text("删除")').count():
                            await self.handle_upload_error(params.video_path)
                    
                    await asyncio.sleep(3)
            except:
                await asyncio.sleep(3)
        
        self.report_progress(60, "处理视频设置...")
        
        # 添加短标题
        try:
            short_title_el = self.page.get_by_text("短标题", exact=True).locator("..").locator(
                "xpath=following-sibling::div").locator('span input[type="text"]')
            if await short_title_el.count():
                short_title = format_short_title(params.title)
                await short_title_el.fill(short_title)
        except:
            pass
        
        # 定时发布
        if params.publish_date:
            self.report_progress(70, "设置定时发布...")
            await self.set_schedule_time(params.publish_date)
        
        self.report_progress(80, "正在发布...")
        
        # 点击发布
        for _ in range(30):
            try:
                publish_btn = self.page.locator('div.form-btns button:has-text("发表")')
                if await publish_btn.count():
                    await publish_btn.click()
                await self.page.wait_for_url(
                    "https://channels.weixin.qq.com/platform/post/list",
                    timeout=10000
                )
                self.report_progress(100, "发布成功")
                return PublishResult(
                    success=True,
                    platform=self.platform_name,
                    message="发布成功"
                )
            except:
                current_url = self.page.url
                if "post/list" in current_url:
                    self.report_progress(100, "发布成功")
                    return PublishResult(
                        success=True,
                        platform=self.platform_name,
                        message="发布成功"
                    )
                await asyncio.sleep(1)
        
        raise Exception("发布超时")

    async def get_works(self, cookies: str, page: int = 0, page_size: int = 20) -> WorksResult:
        """获取视频号作品列表"""
        print(f"\n{'='*60}")
        print(f"[{self.platform_name}] 获取作品列表")
        print(f"[{self.platform_name}] page={page}, page_size={page_size}")
        print(f"{'='*60}")
        
        works: List[WorkItem] = []
        total = 0
        has_more = False
        
        try:
            await self.init_browser()
            cookie_list = self.parse_cookies(cookies)
            await self.set_cookies(cookie_list)
            
            if not self.page:
                raise Exception("Page not initialized")
            
            # 访问视频号创作者中心
            await self.page.goto("https://channels.weixin.qq.com/platform/post/list")
            await asyncio.sleep(5)
            
            # 检查登录状态
            current_url = self.page.url
            if "login" in current_url:
                raise Exception("Cookie 已过期，请重新登录")
            
            # 视频号使用页面爬取方式获取作品列表
            # 等待作品列表加载
            await self.page.wait_for_selector('div.post-feed-wrap', timeout=10000)
            
            # 获取所有作品项
            post_items = self.page.locator('div.post-feed-item')
            item_count = await post_items.count()
            
            print(f"[{self.platform_name}] 找到 {item_count} 个作品项")
            
            for i in range(min(item_count, page_size)):
                try:
                    item = post_items.nth(i)
                    
                    # 获取封面
                    cover_el = item.locator('div.cover-wrap img').first
                    cover_url = ''
                    if await cover_el.count() > 0:
                        cover_url = await cover_el.get_attribute('src') or ''
                    
                    # 获取标题
                    title_el = item.locator('div.content').first
                    title = ''
                    if await title_el.count() > 0:
                        title = await title_el.text_content() or ''
                        title = title.strip()[:50]
                    
                    # 获取统计数据
                    stats_el = item.locator('div.post-data')
                    play_count = 0
                    like_count = 0
                    comment_count = 0
                    
                    if await stats_el.count() > 0:
                        stats_text = await stats_el.text_content() or ''
                        # 解析统计数据（格式可能是: 播放 100 点赞 50 评论 10）
                        import re
                        play_match = re.search(r'播放[\s]*([\d.]+[万]?)', stats_text)
                        like_match = re.search(r'点赞[\s]*([\d.]+[万]?)', stats_text)
                        comment_match = re.search(r'评论[\s]*([\d.]+[万]?)', stats_text)
                        
                        def parse_count(match):
                            if not match:
                                return 0
                            val = match.group(1)
                            if '万' in val:
                                return int(float(val.replace('万', '')) * 10000)
                            return int(val)
                        
                        play_count = parse_count(play_match)
                        like_count = parse_count(like_match)
                        comment_count = parse_count(comment_match)
                    
                    # 获取发布时间
                    time_el = item.locator('div.time')
                    publish_time = ''
                    if await time_el.count() > 0:
                        publish_time = await time_el.text_content() or ''
                        publish_time = publish_time.strip()
                    
                    # 生成临时 work_id（视频号可能需要从详情页获取）
                    work_id = f"weixin_{i}_{hash(title)}"
                    
                    works.append(WorkItem(
                        work_id=work_id,
                        title=title or '无标题',
                        cover_url=cover_url,
                        duration=0,
                        status='published',
                        publish_time=publish_time,
                        play_count=play_count,
                        like_count=like_count,
                        comment_count=comment_count,
                    ))
                except Exception as e:
                    print(f"[{self.platform_name}] 解析作品 {i} 失败: {e}")
                    continue
            
            total = len(works)
            has_more = item_count > page_size
            print(f"[{self.platform_name}] 获取到 {total} 个作品")
            
        except Exception as e:
            import traceback
            traceback.print_exc()
            return WorksResult(success=False, platform=self.platform_name, error=str(e))
        
        return WorksResult(success=True, platform=self.platform_name, works=works, total=total, has_more=has_more)
    
    async def get_comments(self, cookies: str, work_id: str, cursor: str = "") -> CommentsResult:
        """获取视频号作品评论"""
        print(f"\n{'='*60}")
        print(f"[{self.platform_name}] 获取作品评论")
        print(f"[{self.platform_name}] work_id={work_id}")
        print(f"{'='*60}")
        
        comments: List[CommentItem] = []
        total = 0
        has_more = False
        
        try:
            await self.init_browser()
            cookie_list = self.parse_cookies(cookies)
            await self.set_cookies(cookie_list)
            
            if not self.page:
                raise Exception("Page not initialized")
            
            # 访问评论管理页面
            await self.page.goto("https://channels.weixin.qq.com/platform/comment/index")
            await asyncio.sleep(5)
            
            # 检查登录状态
            current_url = self.page.url
            if "login" in current_url:
                raise Exception("Cookie 已过期，请重新登录")
            
            # 等待评论列表加载
            try:
                await self.page.wait_for_selector('div.comment-list', timeout=10000)
            except:
                print(f"[{self.platform_name}] 未找到评论列表")
                return CommentsResult(success=True, platform=self.platform_name, work_id=work_id, comments=[], total=0, has_more=False)
            
            # 获取所有评论项
            comment_items = self.page.locator('div.comment-item')
            item_count = await comment_items.count()
            
            print(f"[{self.platform_name}] 找到 {item_count} 个评论项")
            
            for i in range(item_count):
                try:
                    item = comment_items.nth(i)
                    
                    # 获取作者信息
                    author_name = ''
                    author_avatar = ''
                    name_el = item.locator('div.nick-name')
                    if await name_el.count() > 0:
                        author_name = await name_el.text_content() or ''
                        author_name = author_name.strip()
                    
                    avatar_el = item.locator('img.avatar')
                    if await avatar_el.count() > 0:
                        author_avatar = await avatar_el.get_attribute('src') or ''
                    
                    # 获取评论内容
                    content = ''
                    content_el = item.locator('div.comment-content')
                    if await content_el.count() > 0:
                        content = await content_el.text_content() or ''
                        content = content.strip()
                    
                    # 获取时间
                    create_time = ''
                    time_el = item.locator('div.time')
                    if await time_el.count() > 0:
                        create_time = await time_el.text_content() or ''
                        create_time = create_time.strip()
                    
                    # 生成评论 ID
                    comment_id = f"weixin_comment_{i}_{hash(content)}"
                    
                    comments.append(CommentItem(
                        comment_id=comment_id,
                        work_id=work_id,
                        content=content,
                        author_id='',
                        author_name=author_name,
                        author_avatar=author_avatar,
                        like_count=0,
                        reply_count=0,
                        create_time=create_time,
                    ))
                except Exception as e:
                    print(f"[{self.platform_name}] 解析评论 {i} 失败: {e}")
                    continue
            
            total = len(comments)
            print(f"[{self.platform_name}] 获取到 {total} 条评论")
            
        except Exception as e:
            import traceback
            traceback.print_exc()
            return CommentsResult(success=False, platform=self.platform_name, work_id=work_id, error=str(e))
        
        return CommentsResult(success=True, platform=self.platform_name, work_id=work_id, comments=comments, total=total, has_more=has_more)