Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import torch | |
| from PIL import Image | |
| from transformers import AutoModel, AutoTokenizer | |
| import warnings | |
| import os | |
| import spaces | |
| import markdown | |
| import re | |
| # 禁用警告信息 | |
| warnings.filterwarnings("ignore") | |
| # 全局变量存储模型 | |
| model = None | |
| tokenizer = None | |
| def load_model(): | |
| """加载MiniCPM-o模型""" | |
| global model, tokenizer | |
| if model is None: | |
| print("正在加载MiniCPM-o模型...") | |
| device = "cuda" if torch.cuda.is_available() else "cpu" | |
| model = AutoModel.from_pretrained( | |
| 'openbmb/MiniCPM-o-2_6', | |
| trust_remote_code=True, | |
| torch_dtype=torch.bfloat16 if device == "cuda" else torch.float32, | |
| device_map="auto" if device == "cuda" else None, | |
| init_vision=True, | |
| init_audio=False, | |
| init_tts=False | |
| ) | |
| model = model.eval().to(device) | |
| tokenizer = AutoTokenizer.from_pretrained('openbmb/MiniCPM-o-2_6', trust_remote_code=True) | |
| print("模型加载完成") | |
| return model, tokenizer | |
| def clean_markdown_output(text): | |
| """清理输出文本,只保留markdown表格""" | |
| lines = text.strip().split('\n') | |
| markdown_lines = [] | |
| # 查找markdown表格的开始和结束 | |
| in_table = False | |
| for line in lines: | |
| line = line.strip() | |
| # 检查是否是表格行(包含|符号) | |
| if '|' in line and not line.startswith('```'): | |
| in_table = True | |
| markdown_lines.append(line) | |
| elif in_table and line == '': | |
| # 空行可能表示表格结束 | |
| break | |
| elif in_table and not line.startswith('```'): | |
| # 继续收集表格相关行 | |
| markdown_lines.append(line) | |
| # 如果没有找到表格,返回原始清理后的文本 | |
| if not markdown_lines: | |
| # 移除代码块标记和多余的说明文字 | |
| cleaned_text = text.replace('```markdown', '').replace('```', '').strip() | |
| # 移除常见的解释性文字 | |
| lines = cleaned_text.split('\n') | |
| result_lines = [] | |
| for line in lines: | |
| line = line.strip() | |
| if line and not line.startswith('这个表格') and not line.startswith('该表格') and not line.startswith('表格显示'): | |
| result_lines.append(line) | |
| return '\n'.join(result_lines) | |
| return '\n'.join(markdown_lines) | |
| def clean_formula_output(text): | |
| """清理输出文本,只保留LaTeX公式""" | |
| lines = text.strip().split('\n') | |
| formula_lines = [] | |
| for line in lines: | |
| line = line.strip() | |
| # 跳过解释性文字 | |
| if line and not any(line.startswith(prefix) for prefix in [ | |
| '这个公式', '该公式', '公式表示', '根据图片', '图片中的', '识别结果' | |
| ]): | |
| # 保留包含LaTeX语法的行 | |
| if any(symbol in line for symbol in ['$', '\\', '{', '}', '^', '_']) or '=' in line: | |
| formula_lines.append(line) | |
| # 或者保留纯数学表达式 | |
| elif any(char.isdigit() or char in '+-*/=()[]{}^_' for char in line): | |
| formula_lines.append(line) | |
| # 如果没有找到公式,返回原始清理后的文本 | |
| if not formula_lines: | |
| cleaned_text = text.replace('```latex', '').replace('```', '').strip() | |
| lines = cleaned_text.split('\n') | |
| result_lines = [] | |
| for line in lines: | |
| line = line.strip() | |
| if line and not any(line.startswith(prefix) for prefix in [ | |
| '这个公式', '该公式', '公式表示', '根据图片', '图片中的' | |
| ]): | |
| result_lines.append(line) | |
| return '\n'.join(result_lines) | |
| return '\n'.join(formula_lines) | |
| def clean_text_output(text): | |
| """清理输出文本,只保留识别的文字内容""" | |
| # 移除代码块标记 | |
| cleaned_text = text.replace('```text', '').replace('```', '').strip() | |
| lines = cleaned_text.split('\n') | |
| text_lines = [] | |
| for line in lines: | |
| line = line.strip() | |
| # 跳过解释性文字和标签信息 | |
| if line and not any(line.startswith(prefix) for prefix in [ | |
| '图片中的文字', '识别结果', '文字内容', '根据图片', '这张图片', '该图片', | |
| '标题:', '正文:', '内容:', '文本:', '题目:', '段落:', '文字:' | |
| ]): | |
| # 移除行首的标签格式(如 "标题:内容" -> "内容") | |
| if ':' in line: | |
| # 检查是否是标签格式 | |
| parts = line.split(':', 1) | |
| if len(parts) == 2 and len(parts[0]) <= 10: # 标签通常很短 | |
| # 可能的标签词 | |
| label_keywords = ['标题', '正文', '内容', '文本', '题目', '段落', '文字', '主题', '副标题'] | |
| if any(keyword in parts[0] for keyword in label_keywords): | |
| # 只保留标签后的内容 | |
| text_lines.append(parts[1].strip()) | |
| else: | |
| # 不是标签格式,保留整行 | |
| text_lines.append(line) | |
| else: | |
| text_lines.append(line) | |
| else: | |
| text_lines.append(line) | |
| return '\n'.join(text_lines) | |
| def detect_content_types(text): | |
| """检测文本中包含的内容类型""" | |
| content_types = set() | |
| # 检测表格(Markdown格式) | |
| if '|' in text and any(line.count('|') >= 2 for line in text.split('\n')): | |
| content_types.add('table') | |
| # 检测公式(LaTeX格式) | |
| formula_indicators = ['$', '\\frac', '\\sum', '\\int', '\\sqrt', '\\alpha', '\\beta', '\\gamma', '\\delta', | |
| '\\theta', '\\pi', '\\sigma', '\\omega', '\\infty', '\\partial', '\\nabla'] | |
| if any(indicator in text for indicator in formula_indicators) or \ | |
| (any(symbol in text for symbol in ['{', '}', '^', '_']) and any(char.isdigit() for char in text)): | |
| content_types.add('formula') | |
| # 总是包含文本 | |
| content_types.add('text') | |
| return content_types | |
| def render_mixed_content(text): | |
| """渲染混合内容(文本+表格+公式)""" | |
| if not text.strip(): | |
| return text | |
| # 检测内容类型 | |
| content_types = detect_content_types(text) | |
| # 如果只有纯文本,简单处理 | |
| if content_types == {'text'}: | |
| return f"<div style='padding: 15px; white-space: pre-wrap; font-family: Arial, sans-serif; line-height: 1.6;'>{text}</div>" | |
| # 处理混合内容 | |
| lines = text.split('\n') | |
| rendered_parts = [] | |
| current_block = [] | |
| current_type = 'text' | |
| i = 0 | |
| while i < len(lines): | |
| line = lines[i].strip() | |
| # 检测表格开始 | |
| if '|' in line and line.count('|') >= 2: | |
| # 先处理之前累积的文本块 | |
| if current_block and current_type == 'text': | |
| text_content = '\n'.join(current_block) | |
| if text_content.strip(): | |
| rendered_parts.append(f"<div style='padding: 10px 0; white-space: pre-wrap; font-family: Arial, sans-serif; line-height: 1.6;'>{text_content}</div>") | |
| current_block = [] | |
| # 收集表格行 | |
| table_lines = [] | |
| while i < len(lines) and '|' in lines[i]: | |
| table_lines.append(lines[i]) | |
| i += 1 | |
| # 渲染表格 | |
| if table_lines: | |
| table_markdown = '\n'.join(table_lines) | |
| table_html = render_markdown_table(table_markdown) | |
| rendered_parts.append(table_html) | |
| current_type = 'text' | |
| continue | |
| # 检测公式(简单检测包含LaTeX符号的行) | |
| elif any(symbol in line for symbol in ['$', '\\frac', '\\sum', '\\int', '\\sqrt']) and current_type != 'formula': | |
| # 先处理之前累积的文本块 | |
| if current_block and current_type == 'text': | |
| text_content = '\n'.join(current_block) | |
| if text_content.strip(): | |
| rendered_parts.append(f"<div style='padding: 10px 0; white-space: pre-wrap; font-family: Arial, sans-serif; line-height: 1.6;'>{text_content}</div>") | |
| current_block = [] | |
| # 收集公式行 | |
| formula_lines = [line] | |
| i += 1 | |
| while i < len(lines): | |
| next_line = lines[i].strip() | |
| if any(symbol in next_line for symbol in ['$', '\\', '{', '}', '^', '_']) or \ | |
| any(char.isdigit() or char in '+-*/=()[]{}^_' for char in next_line): | |
| formula_lines.append(next_line) | |
| i += 1 | |
| else: | |
| break | |
| # 渲染公式 | |
| if formula_lines: | |
| formula_text = '\n'.join(formula_lines) | |
| formula_html = render_latex_formula(formula_text) | |
| rendered_parts.append(formula_html) | |
| current_type = 'text' | |
| continue | |
| # 普通文本行 | |
| else: | |
| current_block.append(lines[i]) | |
| current_type = 'text' | |
| i += 1 | |
| # 处理最后剩余的文本块 | |
| if current_block: | |
| text_content = '\n'.join(current_block) | |
| if text_content.strip(): | |
| rendered_parts.append(f"<div style='padding: 10px 0; white-space: pre-wrap; font-family: Arial, sans-serif; line-height: 1.6;'>{text_content}</div>") | |
| # 合并所有渲染部分 | |
| if rendered_parts: | |
| return '<div style="padding: 5px;">' + ''.join(rendered_parts) + '</div>' | |
| else: | |
| return f"<div style='padding: 15px; white-space: pre-wrap; font-family: Arial, sans-serif; line-height: 1.6;'>{text}</div>" | |
| def render_markdown_table(markdown_text): | |
| """将Markdown表格转换为HTML渲染格式""" | |
| if not markdown_text.strip(): | |
| return markdown_text | |
| # 使用markdown库转换为HTML | |
| html_content = markdown.markdown(markdown_text, extensions=['tables']) | |
| # 添加表格样式 | |
| styled_html = f""" | |
| <div style="overflow-x: auto; margin: 10px 0;"> | |
| <style> | |
| table {{ | |
| border-collapse: collapse; | |
| width: 100%; | |
| margin: 10px 0; | |
| font-family: Arial, sans-serif; | |
| }} | |
| th, td {{ | |
| border: 1px solid #ddd; | |
| padding: 8px 12px; | |
| text-align: left; | |
| }} | |
| th {{ | |
| background-color: #f2f2f2; | |
| font-weight: bold; | |
| }} | |
| tr:nth-child(even) {{ | |
| background-color: #f9f9f9; | |
| }} | |
| tr:hover {{ | |
| background-color: #f5f5f5; | |
| }} | |
| </style> | |
| {html_content} | |
| </div> | |
| """ | |
| return styled_html | |
| def render_latex_formula(latex_text): | |
| """将LaTeX公式转换为可渲染的HTML格式""" | |
| if not latex_text.strip(): | |
| return latex_text | |
| # 处理LaTeX公式,确保正确的MathJax格式 | |
| lines = latex_text.strip().split('\n') | |
| processed_lines = [] | |
| for line in lines: | |
| line = line.strip() | |
| if line: | |
| # 检查是否已经有$符号包围 | |
| if not (line.startswith('$') and line.endswith('$')): | |
| # 如果是单行公式,用$$包围(块级公式) | |
| if '=' in line or any(symbol in line for symbol in ['\\', '{', '}', '^', '_']): | |
| line = f"$${line}$$" | |
| else: | |
| line = f"${line}$" | |
| processed_lines.append(line) | |
| formula_html = '<br>'.join(processed_lines) | |
| # 添加MathJax支持的HTML | |
| html_content = f""" | |
| <div style="margin: 10px 0; padding: 15px; background-color: #f8f9fa; border-left: 4px solid #007bff; border-radius: 4px;"> | |
| <div style="font-family: 'Times New Roman', serif; font-size: 16px; line-height: 1.6;"> | |
| {formula_html} | |
| </div> | |
| </div> | |
| <script> | |
| if (typeof MathJax !== 'undefined') {{ | |
| MathJax.typesetPromise(); | |
| }} | |
| </script> | |
| """ | |
| return html_content | |
| def parse_image(image, parse_type): | |
| """解析图片内容为指定格式""" | |
| try: | |
| # 确保模型已加载 | |
| model, tokenizer = load_model() | |
| if image is None: | |
| return "请上传一张图片", "" | |
| # 转换图片格式 | |
| if isinstance(image, str): | |
| image = Image.open(image).convert('RGB') | |
| elif hasattr(image, 'convert'): | |
| image = image.convert('RGB') | |
| # 根据解析类型设置不同的提示词 | |
| questions = { | |
| "表格解析": "解析一下这个表格为markdown格式,不需要任何解释和思考,直接输出markdown格式", | |
| "公式解析": "识别并提取图片中的数学公式,用LaTeX格式输出,不需要任何解释,直接输出公式", | |
| "文本解析": "识别并提取图片中的所有文字内容,保持原有格式,不需要任何解释,直接输出文字内容" | |
| } | |
| question = questions.get(parse_type, questions["表格解析"]) | |
| msgs = [{'role': 'user', 'content': [image, question]}] | |
| # 使用流式输出获取结果 | |
| res = model.chat( | |
| msgs=msgs, | |
| tokenizer=tokenizer, | |
| sampling=True, | |
| stream=True | |
| ) | |
| # 收集所有输出文本 | |
| generated_text = "" | |
| for new_text in res: | |
| generated_text += new_text | |
| # 根据类型清理输出并渲染 | |
| if parse_type == "表格解析": | |
| cleaned_result = clean_markdown_output(generated_text) | |
| rendered_result = render_markdown_table(cleaned_result) | |
| output_format = "Markdown表格" | |
| return rendered_result, cleaned_result, f"解析完成 - 输出格式: {output_format}" | |
| elif parse_type == "公式解析": | |
| cleaned_result = clean_formula_output(generated_text) | |
| rendered_result = render_latex_formula(cleaned_result) | |
| output_format = "LaTeX公式" | |
| return rendered_result, cleaned_result, f"解析完成 - 输出格式: {output_format}" | |
| elif parse_type == "文本解析": | |
| cleaned_result = clean_text_output(generated_text) | |
| # 检测是否包含表格或公式,智能渲染 | |
| content_types = detect_content_types(cleaned_result) | |
| if len(content_types) > 1: # 包含多种内容类型 | |
| rendered_result = render_mixed_content(cleaned_result) | |
| output_format = "混合内容(文本+表格+公式)" | |
| else: | |
| rendered_result = f"<div style='padding: 15px; white-space: pre-wrap; font-family: Arial, sans-serif; line-height: 1.6;'>{cleaned_result}</div>" | |
| output_format = "纯文本" | |
| return rendered_result, cleaned_result, f"解析完成 - 输出格式: {output_format}" | |
| else: | |
| result = generated_text.strip() | |
| output_format = "原始输出" | |
| return f"<div style='padding: 15px; white-space: pre-wrap; font-family: monospace;'>{result}</div>", result, f"解析完成 - 输出格式: {output_format}" | |
| except Exception as e: | |
| error_html = f"<div style='color: red; padding: 15px; border: 1px solid red; border-radius: 4px;'>解析失败: {str(e)}</div>" | |
| return error_html, str(e), "错误" | |
| def create_interface(): | |
| """创建Gradio界面""" | |
| # 自定义CSS样式 | |
| css = """ | |
| .gradio-container { | |
| font-family: 'Helvetica Neue', Arial, sans-serif; | |
| } | |
| .output-text { | |
| font-family: 'Courier New', monospace; | |
| font-size: 14px; | |
| } | |
| .rendered-output { | |
| font-family: Arial, sans-serif; | |
| line-height: 1.6; | |
| } | |
| .rendered-output table { | |
| border-collapse: collapse; | |
| width: 100%; | |
| margin: 10px 0; | |
| } | |
| .rendered-output th, .rendered-output td { | |
| border: 1px solid #ddd; | |
| padding: 8px 12px; | |
| text-align: left; | |
| } | |
| .rendered-output th { | |
| background-color: #f2f2f2; | |
| font-weight: bold; | |
| } | |
| .rendered-output tr:nth-child(even) { | |
| background-color: #f9f9f9; | |
| } | |
| """ | |
| # MathJax配置 | |
| mathjax_config = """ | |
| <script> | |
| window.MathJax = { | |
| tex: { | |
| inlineMath: [['$', '$'], ['\\(', '\\)']], | |
| displayMath: [['$$', '$$'], ['\\[', '\\]']], | |
| processEscapes: true, | |
| processEnvironments: true | |
| }, | |
| options: { | |
| ignoreHtmlClass: 'tex2jax_ignore', | |
| processHtmlClass: 'tex2jax_process' | |
| } | |
| }; | |
| </script> | |
| <script src="https://polyfill.io/v3/polyfill.min.js?features=es6"></script> | |
| <script id="MathJax-script" async src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js"></script> | |
| """ | |
| with gr.Blocks(css=css, title="MiniCPM 多模态内容解析工具", analytics_enabled=False, head=mathjax_config) as interface: | |
| gr.Markdown(""" | |
| # 🚀 MiniCPM 多模态内容解析工具 | |
| 基于MiniCPM-o多模态模型的智能图片内容解析工具,支持表格、公式、文本三种解析模式。 | |
| ## 📋 使用说明 | |
| 1. **上传图片**: 支持 PNG、JPG、JPEG 等格式 | |
| 2. **选择解析类型**: 根据图片内容选择相应的解析模式 | |
| 3. **获取结果**: 自动渲染显示,表格和公式直接可视化 | |
| ## 🎯 解析类型说明 | |
| - **📊 表格解析**: 将表格图片转换为可视化表格 | |
| - **🧮 公式解析**: 识别数学公式并渲染显示 | |
| - **📝 文本解析**: 提取图片中的所有文字内容,智能识别并渲染其中的表格和公式 | |
| """) | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| # 输入组件 | |
| image_input = gr.Image( | |
| label="📷 上传图片", | |
| type="pil", | |
| height=400 | |
| ) | |
| parse_type = gr.Radio( | |
| choices=["表格解析", "公式解析", "文本解析"], | |
| value="表格解析", | |
| label="🎛️ 选择解析类型", | |
| info="根据图片内容选择合适的解析模式" | |
| ) | |
| parse_button = gr.Button( | |
| "🔍 开始解析", | |
| variant="primary", | |
| size="lg" | |
| ) | |
| with gr.Column(scale=1): | |
| # 输出组件 | |
| status_output = gr.Textbox( | |
| label="📊 解析状态", | |
| value="等待上传图片...", | |
| interactive=False | |
| ) | |
| result_output = gr.HTML( | |
| label="📄 解析结果(渲染视图)", | |
| value="<p style='color: #666; text-align: center; padding: 20px;'>解析结果将在这里显示...</p>", | |
| elem_classes=["rendered-output"] | |
| ) | |
| raw_output = gr.Textbox( | |
| label="📝 原始代码(可复制)", | |
| lines=8, | |
| max_lines=15, | |
| show_copy_button=True, | |
| elem_classes=["output-text"], | |
| placeholder="原始Markdown/LaTeX代码将在这里显示...", | |
| visible=False | |
| ) | |
| # 示例图片 | |
| gr.Markdown("## 📖 示例图片") | |
| with gr.Row(): | |
| gr.Examples( | |
| examples=[ | |
| ["./table.png", "表格解析"], | |
| ["./formulas.png", "公式解析"], | |
| ["./text.png", "文本解析"] | |
| ], | |
| inputs=[image_input, parse_type], | |
| label="点击示例快速体验", | |
| cache_examples=False | |
| ) | |
| # 绑定事件 | |
| parse_button.click( | |
| fn=parse_image, | |
| inputs=[image_input, parse_type], | |
| outputs=[result_output, raw_output, status_output] | |
| ) | |
| # 添加页脚信息 | |
| gr.Markdown(""" | |
| --- | |
| ### 💡 使用提示 | |
| - 确保图片清晰,内容结构明显 | |
| - 复杂表格建议分段处理 | |
| - 公式图片建议使用高分辨率 | |
| - 文字图片避免模糊、倾斜或光线不足 | |
| - **文本解析**现在支持智能识别:如果文本中包含表格或公式,会自动渲染显示 | |
| ### 🔧 技术支持 | |
| - 模型: MiniCPM-o-2.6 | |
| - 框架: Gradio + Transformers | |
| - GPU: CUDA加速推理 | |
| """) | |
| return interface | |
| if __name__ == "__main__": | |
| # 在ZeroGPU环境中不预加载模型,按需加载以节省资源 | |
| print("🚀 启动MiniCPM多模态内容解析工具") | |
| print("📝 模型将在首次使用时自动加载") | |
| # 创建并启动界面 | |
| interface = create_interface() | |
| interface.launch( | |
| server_name="0.0.0.0", # 允许外部访问 | |
| server_port=7860, # Hugging Face Spaces默认端口 | |
| share=False, # 在Hugging Face上部署时设为False | |
| show_error=True, # 显示详细错误信息 | |
| quiet=False, # 显示启动信息 | |
| debug=False, # 关闭调试模式 | |
| ) |