janni-t
/

qwen3-embedding-0.6b-int8-tei-onnx

Sentence Similarity

text-embeddings-inference

Model card Files Files and versions

qwen3-embedding-0.6b-int8-tei-onnx / test_tei.sh

janni-t's picture

feat: int8 quantized version

d716917 verified 6 months ago

history blame contribute delete

849 Bytes

	#!/bin/bash
	# Test script for TEI with the converted ONNX model

	echo "Testing Qwen3-Embedding-0.6B INT8 ONNX with TEI..."
	echo "This model is quantized for faster CPU inference"

	MODEL_PATH=$(pwd)

	echo "Model path: $MODEL_PATH"
	echo "Files in model directory:"
	ls -la $MODEL_PATH

	echo ""
	echo "Expected performance improvement: 2-4x faster on CPU"
	echo "Note: There may be a small accuracy drop (1-3%)"
	echo ""
	echo "To use this model with TEI:"
	echo "1. Upload to HuggingFace Hub, or"
	echo "2. Mount this directory in your TEI container"
	echo "3. Update model-id in porter.yaml to point to this model"

	echo ""
	echo "For optimal CPU performance, set these environment variables:"
	echo "export OMP_NUM_THREADS=$(nproc) # Use all physical cores"
	echo "export KMP_AFFINITY=granularity=fine,compact,1,0"
	echo "export ORT_THREAD_POOL_SIZE=$(nproc)"