| { | |
| "batch_size": 4, | |
| "accum_iter": 4, | |
| "epochs": 200, | |
| "warmup_epochs": 0.001, | |
| "lr": 2e-05, | |
| "min_lr": 0.0, | |
| "wd": 0.1, | |
| "clip_grad": 4.0, | |
| "init_from": "../../../pretrained_models/Lumina-DiMOO", | |
| "data_config": "../configs/all_data.yaml", | |
| "cache_ann_on_disk": true, | |
| "length_clustering": true, | |
| "num_workers": 16, | |
| "pin_mem": true, | |
| "seed": 42, | |
| "output_dir": "output/Lumina-DiMOO-TI2TI-Training-multi-all-data-b200", | |
| "save_interval": 1, | |
| "save_iteration_interval": 1000, | |
| "only_save_trainable": false, | |
| "ckpt_max_keep": 2, | |
| "auto_resume": true, | |
| "resume_path": null, | |
| "model_parallel_size": 1, | |
| "data_parallel": "fsdp", | |
| "precision": "bf16", | |
| "grad_precision": "fp32", | |
| "checkpointing": true, | |
| "max_seq_len": 5120, | |
| "dropout": 0.05, | |
| "world_size": 16, | |
| "rank": 0, | |
| "gpu": 0, | |
| "local_rank": 0, | |
| "dist_url": "env://", | |
| "distributed": true, | |
| "dist_backend": "nccl" | |
| } |