| # Test script for TEI with the converted ONNX model | |
| echo "Testing Qwen3-Embedding-0.6B INT8 ONNX with TEI..." | |
| echo "This model is quantized for faster CPU inference" | |
| MODEL_PATH=$(pwd) | |
| echo "Model path: $MODEL_PATH" | |
| echo "Files in model directory:" | |
| ls -la $MODEL_PATH | |
| echo "" | |
| echo "Expected performance improvement: 2-4x faster on CPU" | |
| echo "Note: There may be a small accuracy drop (1-3%)" | |
| echo "" | |
| echo "To use this model with TEI:" | |
| echo "1. Upload to HuggingFace Hub, or" | |
| echo "2. Mount this directory in your TEI container" | |
| echo "3. Update model-id in porter.yaml to point to this model" | |
| echo "" | |
| echo "For optimal CPU performance, set these environment variables:" | |
| echo "export OMP_NUM_THREADS=$(nproc) # Use all physical cores" | |
| echo "export KMP_AFFINITY=granularity=fine,compact,1,0" | |
| echo "export ORT_THREAD_POOL_SIZE=$(nproc)" | |