File size: 4,242 Bytes
11a4bf9 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 |
# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
defaults:
- execution: local
- deployment: none
- _self_
execution:
output_dir: ./results_nvidia_nemotron_3_nano_30b_a3b
mounts:
evaluation:
./hf_cache: /root/.cache/huggingface
env_vars:
evaluation: {}
target:
api_endpoint:
model_id: nvidia/nemotron-nano-3-30b-a3b
url: https://integrate.api.nvidia.com/v1/chat/completions
api_key_name: NGC_API_KEY # API Key with access to build.nvidia.com
evaluation:
env_vars:
HF_TOKEN: HF_TOKEN
JUDGE_API_KEY: JUDGE_API_KEY # API Key with access to gpt-4o for HLE
HF_HOME: HF_HOME
nemo_evaluator_config:
config:
params:
max_new_tokens: 131072
temperature: 0.99999
top_p: 0.99999
parallelism: 512
request_timeout: 3600
max_retries: 10
extra:
tokenizer: NVIDIA-Nemotron-Nano-3-30B-A3B-BF16
tokenizer_backend: huggingface
target:
api_endpoint:
adapter_config:
use_caching: true
tracking_requests_stats: true
log_failed_requests: true
use_request_logging: true
max_logged_requests: 10
use_response_logging: true
max_logged_responses: 10
tasks:
- name: ns_bfcl_v3
env_vars:
HF_TOKEN: HF_TOKEN
nemo_evaluator_config:
config:
params:
temperature: 0.6
top_p: 0.95
parallelism: 32
extra:
num_repeats: 1
args: ++use_client_parsing=False
target:
api_endpoint:
adapter_config:
use_caching: false
- name: ns_bfcl_v4
env_vars:
HF_TOKEN: HF_TOKEN
nemo_evaluator_config:
config:
params:
max_new_tokens: 8192
parallelism: 128
temperature: 0.6
top_p: 0.95
extra:
num_repeats: 1
args: ++use_client_parsing=False
- name: ns_livecodebench
env_vars:
HF_TOKEN: HF_TOKEN
nemo_evaluator_config:
config:
params:
extra:
num_repeats: 8
dataset_split: test_v5_2407_2412
- name: ns_mmlu_pro
env_vars:
HF_TOKEN: HF_TOKEN
nemo_evaluator_config:
config:
params:
extra:
num_repeats: 1
args: "++prompt_config=eval/aai/mcq-10choices-boxed"
- name: ns_gpqa
env_vars:
HF_TOKEN: HF_TOKEN
nemo_evaluator_config:
config:
params:
extra:
num_repeats: 8
args: "++prompt_config=eval/aai/mcq-4choices"
- name: ns_aime2025
env_vars:
HF_TOKEN: HF_TOKEN
JUDGE_API_KEY: JUDGE_API_KEY
nemo_evaluator_config:
config:
params:
extra:
num_repeats: 64
args: ++prompt_config=/prompt_templates/math-oai.yaml
- name: ns_scicode
env_vars:
HF_TOKEN: HF_TOKEN
JUDGE_API_KEY: JUDGE_API_KEY
nemo_evaluator_config:
config:
params:
extra:
num_repeats: 8
- name: ns_ifbench
env_vars:
HF_TOKEN: HF_TOKEN
nemo_evaluator_config:
config:
params:
extra:
num_repeats: 8
- name: ns_hle
env_vars:
HF_TOKEN: HF_TOKEN
JUDGE_API_KEY: JUDGE_API_KEY
nemo_evaluator_config:
config:
params:
extra:
num_repeats: 1
judge_support: true
judge:
parallelism: 16
model_id: openai/gpt-4o
url: <OPENAI_API_URL_FOR_JUDGE>
api_key: JUDGE_API_KEY
|