Spaces:
Running
Running
Commit
·
9e38f34
1
Parent(s):
a8f90b0
feat: refactor model configuration to use unified MODELS_MAP with backend routing
Browse filesRename OPENAI_PRICING to MODELS_MAP and add backend field to each model configuration for dynamic backend selection. Update model routing logic to use backend field instead of hardcoded model name checks.
- Rename OPENAI_PRICING to MODELS_MAP in common.py
- Add "backend" field to all model configurations (openai/gemini/olmocr)
- Update process_document() to route based on MODELS_MAP[model_choice]["backend"]
- Update model
- app.py +6 -6
- common.py +35 -35
- olm_ocr.py +2 -2
- openai_backend.py +2 -2
app.py
CHANGED
|
@@ -14,7 +14,7 @@ from PIL import Image
|
|
| 14 |
from olmocr.data.renderpdf import render_pdf_to_base64png
|
| 15 |
|
| 16 |
from openai_backend import _run_openai_vision
|
| 17 |
-
from common import
|
| 18 |
from logging_helper import log as _log, log_debug as _log_debug
|
| 19 |
from olm_ocr import _run_olmocr
|
| 20 |
|
|
@@ -118,12 +118,12 @@ def process_document(file_obj, model_choice: str, prompt: str):
|
|
| 118 |
"You are an OCR-to-CSV assistant. Read the table or structured text in the image and output a valid "
|
| 119 |
"CSV representation. Use commas as separators and include a header row if appropriate."
|
| 120 |
)
|
| 121 |
-
|
| 122 |
-
if model_choice
|
| 123 |
csv_text = _run_openai_vision(image, prompt, model_choice)
|
| 124 |
-
elif model_choice ==
|
| 125 |
csv_text = _run_gemini_vision(image, prompt)
|
| 126 |
-
elif model_choice ==
|
| 127 |
csv_text = _run_olmocr(image, prompt)
|
| 128 |
else:
|
| 129 |
csv_text = f"Unknown model choice: {model_choice}"
|
|
@@ -148,7 +148,7 @@ def build_interface() -> gr.Blocks:
|
|
| 148 |
|
| 149 |
model_selector = gr.Dropdown(
|
| 150 |
label="LLM backend",
|
| 151 |
-
choices=list(
|
| 152 |
value=MODEL_OLMOCR,
|
| 153 |
)
|
| 154 |
|
|
|
|
| 14 |
from olmocr.data.renderpdf import render_pdf_to_base64png
|
| 15 |
|
| 16 |
from openai_backend import _run_openai_vision
|
| 17 |
+
from common import MODELS_MAP, MODEL_GEMINI, MODEL_OLMOCR
|
| 18 |
from logging_helper import log as _log, log_debug as _log_debug
|
| 19 |
from olm_ocr import _run_olmocr
|
| 20 |
|
|
|
|
| 118 |
"You are an OCR-to-CSV assistant. Read the table or structured text in the image and output a valid "
|
| 119 |
"CSV representation. Use commas as separators and include a header row if appropriate."
|
| 120 |
)
|
| 121 |
+
_log_debug(f"Using model: {model_choice}")
|
| 122 |
+
if MODELS_MAP[model_choice]["backend"] == "openai":
|
| 123 |
csv_text = _run_openai_vision(image, prompt, model_choice)
|
| 124 |
+
elif MODELS_MAP[model_choice]["backend"] == "gemini":
|
| 125 |
csv_text = _run_gemini_vision(image, prompt)
|
| 126 |
+
elif MODELS_MAP[model_choice]["backend"] == "olmocr":
|
| 127 |
csv_text = _run_olmocr(image, prompt)
|
| 128 |
else:
|
| 129 |
csv_text = f"Unknown model choice: {model_choice}"
|
|
|
|
| 148 |
|
| 149 |
model_selector = gr.Dropdown(
|
| 150 |
label="LLM backend",
|
| 151 |
+
choices=list(MODELS_MAP.keys()),
|
| 152 |
value=MODEL_OLMOCR,
|
| 153 |
)
|
| 154 |
|
common.py
CHANGED
|
@@ -2,52 +2,52 @@ MODEL_GEMINI = "Gemini 3 Pro"
|
|
| 2 |
MODEL_OLMOCR = "olmOCR-2-7B-1025-FP8"
|
| 3 |
|
| 4 |
|
| 5 |
-
|
| 6 |
# GPT-5.2 family
|
| 7 |
-
"gpt-5.2": {"input": 1.75, "output": 14.00},
|
| 8 |
-
"gpt-5.2-chat-latest": {"input": 1.75, "output": 14.00},
|
| 9 |
-
"gpt-5.2-pro": {"input": 21.00, "output": 168.00},
|
| 10 |
|
| 11 |
# GPT-5.1 / GPT-5 family
|
| 12 |
-
"gpt-5.1": {"input": 1.25, "output": 10.00},
|
| 13 |
-
"gpt-5": {"input": 1.25, "output": 10.00},
|
| 14 |
-
"gpt-5-mini": {"input": 0.25, "output": 2.00},
|
| 15 |
-
"gpt-5-nano": {"input": 0.05, "output": 0.40},
|
| 16 |
-
"gpt-5.1-chat-latest": {"input": 1.25, "output": 10.00},
|
| 17 |
-
"gpt-5-chat-latest": {"input": 1.25, "output": 10.00},
|
| 18 |
-
"gpt-5.1-codex-max": {"input": 1.25, "output": 10.00},
|
| 19 |
-
"gpt-5.1-codex": {"input": 1.25, "output": 10.00},
|
| 20 |
-
"gpt-5-codex": {"input": 1.25, "output": 10.00},
|
| 21 |
-
"gpt-5.1-codex-mini": {"input": 0.25, "output": 2.00},
|
| 22 |
-
"gpt-5-pro": {"input": 15.00, "output": 120.00},
|
| 23 |
-
"gpt-5-search-api": {"input": 1.25, "output": 10.00},
|
| 24 |
|
| 25 |
# GPT-4.1 family
|
| 26 |
-
"gpt-4.1": {"input": 2.00, "output": 8.00},
|
| 27 |
-
"gpt-4.1-mini": {"input": 0.40, "output": 1.60},
|
| 28 |
-
"gpt-4.1-nano": {"input": 0.10, "output": 0.40},
|
| 29 |
|
| 30 |
# GPT-4o family
|
| 31 |
-
"gpt-4o": {"input": 2.50, "output": 10.00},
|
| 32 |
-
"gpt-4o-2024-05-13": {"input": 5.00, "output": 15.00},
|
| 33 |
-
"gpt-4o-mini": {"input": 0.15, "output": 0.60},
|
| 34 |
-
"chatgpt-4o-latest": {"input": 5.00, "output": 15.00},
|
| 35 |
|
| 36 |
# GPT-4 Turbo / GPT-4 legacy family (from legacy models table)
|
| 37 |
-
"gpt-4-turbo": {"input": 10.00, "output": 30.00},
|
| 38 |
-
"gpt-4-turbo-2024-04-09": {"input": 10.00, "output": 30.00},
|
| 39 |
-
"gpt-4-0125-preview": {"input": 10.00, "output": 30.00},
|
| 40 |
-
"gpt-4-1106-preview": {"input": 10.00, "output": 30.00},
|
| 41 |
-
"gpt-4-1106-vision-preview": {"input": 10.00, "output": 30.00},
|
| 42 |
-
"gpt-4-0613": {"input": 30.00, "output": 60.00},
|
| 43 |
-
"gpt-4-0314": {"input": 30.00, "output": 60.00},
|
| 44 |
-
"gpt-4": {"input": 30.00, "output": 60.00},
|
| 45 |
-
"gpt-4-32k": {"input": 60.00, "output": 120.00},
|
| 46 |
|
| 47 |
# Default
|
| 48 |
-
"default": {"input": 2.50, "output": 10.00},
|
| 49 |
|
| 50 |
# Other backends (mock rates)
|
| 51 |
-
MODEL_GEMINI: {"input": 1.00, "output": 1.00},
|
| 52 |
-
MODEL_OLMOCR: {"input": 1.35, "output": 0.30},
|
| 53 |
}
|
|
|
|
| 2 |
MODEL_OLMOCR = "olmOCR-2-7B-1025-FP8"
|
| 3 |
|
| 4 |
|
| 5 |
+
MODELS_MAP = {
|
| 6 |
# GPT-5.2 family
|
| 7 |
+
"gpt-5.2": {"input": 1.75, "output": 14.00, "backend": "openai"},
|
| 8 |
+
"gpt-5.2-chat-latest": {"input": 1.75, "output": 14.00, "backend": "openai"},
|
| 9 |
+
"gpt-5.2-pro": {"input": 21.00, "output": 168.00, "backend": "openai"},
|
| 10 |
|
| 11 |
# GPT-5.1 / GPT-5 family
|
| 12 |
+
"gpt-5.1": {"input": 1.25, "output": 10.00, "backend": "openai"},
|
| 13 |
+
"gpt-5": {"input": 1.25, "output": 10.00, "backend": "openai"},
|
| 14 |
+
"gpt-5-mini": {"input": 0.25, "output": 2.00, "backend": "openai"},
|
| 15 |
+
"gpt-5-nano": {"input": 0.05, "output": 0.40, "backend": "openai"},
|
| 16 |
+
"gpt-5.1-chat-latest": {"input": 1.25, "output": 10.00, "backend": "openai"},
|
| 17 |
+
"gpt-5-chat-latest": {"input": 1.25, "output": 10.00, "backend": "openai"},
|
| 18 |
+
"gpt-5.1-codex-max": {"input": 1.25, "output": 10.00, "backend": "openai"},
|
| 19 |
+
"gpt-5.1-codex": {"input": 1.25, "output": 10.00, "backend": "openai"},
|
| 20 |
+
"gpt-5-codex": {"input": 1.25, "output": 10.00, "backend": "openai"},
|
| 21 |
+
"gpt-5.1-codex-mini": {"input": 0.25, "output": 2.00, "backend": "openai"},
|
| 22 |
+
"gpt-5-pro": {"input": 15.00, "output": 120.00, "backend": "openai"},
|
| 23 |
+
"gpt-5-search-api": {"input": 1.25, "output": 10.00, "backend": "openai"},
|
| 24 |
|
| 25 |
# GPT-4.1 family
|
| 26 |
+
"gpt-4.1": {"input": 2.00, "output": 8.00, "backend": "openai"},
|
| 27 |
+
"gpt-4.1-mini": {"input": 0.40, "output": 1.60, "backend": "openai"},
|
| 28 |
+
"gpt-4.1-nano": {"input": 0.10, "output": 0.40, "backend": "openai"},
|
| 29 |
|
| 30 |
# GPT-4o family
|
| 31 |
+
"gpt-4o": {"input": 2.50, "output": 10.00, "backend": "openai"},
|
| 32 |
+
"gpt-4o-2024-05-13": {"input": 5.00, "output": 15.00, "backend": "openai"},
|
| 33 |
+
"gpt-4o-mini": {"input": 0.15, "output": 0.60, "backend": "openai"},
|
| 34 |
+
"chatgpt-4o-latest": {"input": 5.00, "output": 15.00, "backend": "openai"},
|
| 35 |
|
| 36 |
# GPT-4 Turbo / GPT-4 legacy family (from legacy models table)
|
| 37 |
+
"gpt-4-turbo": {"input": 10.00, "output": 30.00, "backend": "openai"},
|
| 38 |
+
"gpt-4-turbo-2024-04-09": {"input": 10.00, "output": 30.00, "backend": "openai"},
|
| 39 |
+
"gpt-4-0125-preview": {"input": 10.00, "output": 30.00, "backend": "openai"},
|
| 40 |
+
"gpt-4-1106-preview": {"input": 10.00, "output": 30.00, "backend": "openai"},
|
| 41 |
+
"gpt-4-1106-vision-preview": {"input": 10.00, "output": 30.00, "backend": "openai"},
|
| 42 |
+
"gpt-4-0613": {"input": 30.00, "output": 60.00, "backend": "openai"},
|
| 43 |
+
"gpt-4-0314": {"input": 30.00, "output": 60.00, "backend": "openai"},
|
| 44 |
+
"gpt-4": {"input": 30.00, "output": 60.00, "backend": "openai"},
|
| 45 |
+
"gpt-4-32k": {"input": 60.00, "output": 120.00, "backend": "openai"},
|
| 46 |
|
| 47 |
# Default
|
| 48 |
+
"default": {"input": 2.50, "output": 10.00, "backend": "openai"},
|
| 49 |
|
| 50 |
# Other backends (mock rates)
|
| 51 |
+
MODEL_GEMINI: {"input": 1.00, "output": 1.00, "backend": "gemini"},
|
| 52 |
+
MODEL_OLMOCR: {"input": 1.35, "output": 0.30, "backend": "olmocr"},
|
| 53 |
}
|
olm_ocr.py
CHANGED
|
@@ -5,7 +5,7 @@ from PIL import Image
|
|
| 5 |
from huggingface_hub import InferenceClient
|
| 6 |
from image_utils import _pil_image_to_base64_jpeg
|
| 7 |
from logging_helper import _log_model_response
|
| 8 |
-
from common import
|
| 9 |
|
| 10 |
|
| 11 |
MODEL_ID = "allenai/olmOCR-2-7B-1025-FP8"
|
|
@@ -56,7 +56,7 @@ def _run_olmocr(image: Image.Image, prompt: str) -> str:
|
|
| 56 |
content=content,
|
| 57 |
duration=duration,
|
| 58 |
usage=completion.usage,
|
| 59 |
-
pricing=
|
| 60 |
)
|
| 61 |
|
| 62 |
return content
|
|
|
|
| 5 |
from huggingface_hub import InferenceClient
|
| 6 |
from image_utils import _pil_image_to_base64_jpeg
|
| 7 |
from logging_helper import _log_model_response
|
| 8 |
+
from common import MODELS_MAP
|
| 9 |
|
| 10 |
|
| 11 |
MODEL_ID = "allenai/olmOCR-2-7B-1025-FP8"
|
|
|
|
| 56 |
content=content,
|
| 57 |
duration=duration,
|
| 58 |
usage=completion.usage,
|
| 59 |
+
pricing=MODELS_MAP,
|
| 60 |
)
|
| 61 |
|
| 62 |
return content
|
openai_backend.py
CHANGED
|
@@ -6,7 +6,7 @@ from typing import Optional
|
|
| 6 |
from PIL import Image
|
| 7 |
from logging_helper import log as _log, log_debug as _log_debug, _log_model_response
|
| 8 |
from image_utils import _pil_image_to_base64_jpeg
|
| 9 |
-
from common import
|
| 10 |
|
| 11 |
try:
|
| 12 |
from openai import OpenAI
|
|
@@ -56,7 +56,7 @@ def _run_openai_vision(image: Image.Image, prompt: str, model_name: str) -> str:
|
|
| 56 |
content=content,
|
| 57 |
duration=duration,
|
| 58 |
usage=response.usage,
|
| 59 |
-
pricing=
|
| 60 |
)
|
| 61 |
|
| 62 |
return content
|
|
|
|
| 6 |
from PIL import Image
|
| 7 |
from logging_helper import log as _log, log_debug as _log_debug, _log_model_response
|
| 8 |
from image_utils import _pil_image_to_base64_jpeg
|
| 9 |
+
from common import MODELS_MAP
|
| 10 |
|
| 11 |
try:
|
| 12 |
from openai import OpenAI
|
|
|
|
| 56 |
content=content,
|
| 57 |
duration=duration,
|
| 58 |
usage=response.usage,
|
| 59 |
+
pricing=MODELS_MAP,
|
| 60 |
)
|
| 61 |
|
| 62 |
return content
|