{
  "best_metric": 0.443807452917099,
  "best_model_checkpoint": "CTCLLMs_self_tokenizer/checkpoints/LongSpeech_CTC-Shrink_augment_data_self_tokenizer_addMLS_projector_restore/checkpoint-30000",
  "epoch": 1.0,
  "eval_steps": 1000,
  "global_step": 31479,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.0006353441977191143,
      "grad_norm": 45.06840896606445,
      "learning_rate": 3.597883597883598e-06,
      "loss": 72.1477,
      "step": 20
    },
    {
      "epoch": 0.0012706883954382287,
      "grad_norm": 56.45563507080078,
      "learning_rate": 7.830687830687831e-06,
      "loss": 71.8917,
      "step": 40
    },
    {
      "epoch": 0.001906032593157343,
      "grad_norm": 62.59088897705078,
      "learning_rate": 1.1851851851851853e-05,
      "loss": 71.7764,
      "step": 60
    },
    {
      "epoch": 0.0025413767908764573,
      "grad_norm": 75.64707946777344,
      "learning_rate": 1.6084656084656086e-05,
      "loss": 70.9277,
      "step": 80
    },
    {
      "epoch": 0.003176720988595572,
      "grad_norm": 73.5933837890625,
      "learning_rate": 2.031746031746032e-05,
      "loss": 68.0688,
      "step": 100
    },
    {
      "epoch": 0.003812065186314686,
      "grad_norm": 77.9434814453125,
      "learning_rate": 2.4550264550264552e-05,
      "loss": 65.4844,
      "step": 120
    },
    {
      "epoch": 0.004447409384033801,
      "grad_norm": 81.92144775390625,
      "learning_rate": 2.8783068783068785e-05,
      "loss": 61.2486,
      "step": 140
    },
    {
      "epoch": 0.005082753581752915,
      "grad_norm": 91.82105255126953,
      "learning_rate": 3.3015873015873014e-05,
      "loss": 55.9783,
      "step": 160
    },
    {
      "epoch": 0.005718097779472029,
      "grad_norm": 103.17108917236328,
      "learning_rate": 3.724867724867725e-05,
      "loss": 51.7487,
      "step": 180
    },
    {
      "epoch": 0.006353441977191144,
      "grad_norm": 98.97240447998047,
      "learning_rate": 4.148148148148148e-05,
      "loss": 45.0213,
      "step": 200
    },
    {
      "epoch": 0.006988786174910258,
      "grad_norm": 81.4900894165039,
      "learning_rate": 4.5714285714285716e-05,
      "loss": 38.3125,
      "step": 220
    },
    {
      "epoch": 0.007624130372629372,
      "grad_norm": 71.47420501708984,
      "learning_rate": 4.9947089947089946e-05,
      "loss": 33.2395,
      "step": 240
    },
    {
      "epoch": 0.008259474570348486,
      "grad_norm": 63.618309020996094,
      "learning_rate": 5.417989417989419e-05,
      "loss": 28.4421,
      "step": 260
    },
    {
      "epoch": 0.008894818768067601,
      "grad_norm": 58.004974365234375,
      "learning_rate": 5.841269841269842e-05,
      "loss": 25.048,
      "step": 280
    },
    {
      "epoch": 0.009530162965786714,
      "grad_norm": 46.489200592041016,
      "learning_rate": 6.264550264550265e-05,
      "loss": 21.9312,
      "step": 300
    },
    {
      "epoch": 0.01016550716350583,
      "grad_norm": 37.90148162841797,
      "learning_rate": 6.687830687830688e-05,
      "loss": 19.0696,
      "step": 320
    },
    {
      "epoch": 0.010800851361224944,
      "grad_norm": 36.47368240356445,
      "learning_rate": 7.111111111111112e-05,
      "loss": 17.0151,
      "step": 340
    },
    {
      "epoch": 0.011436195558944057,
      "grad_norm": 32.80181884765625,
      "learning_rate": 7.534391534391536e-05,
      "loss": 15.5522,
      "step": 360
    },
    {
      "epoch": 0.012071539756663172,
      "grad_norm": 25.543760299682617,
      "learning_rate": 7.957671957671958e-05,
      "loss": 14.1982,
      "step": 380
    },
    {
      "epoch": 0.012706883954382287,
      "grad_norm": 22.31871223449707,
      "learning_rate": 8.380952380952382e-05,
      "loss": 13.2314,
      "step": 400
    },
    {
      "epoch": 0.0133422281521014,
      "grad_norm": 18.374950408935547,
      "learning_rate": 8.804232804232805e-05,
      "loss": 12.4637,
      "step": 420
    },
    {
      "epoch": 0.013977572349820515,
      "grad_norm": 18.497610092163086,
      "learning_rate": 9.227513227513229e-05,
      "loss": 11.9765,
      "step": 440
    },
    {
      "epoch": 0.01461291654753963,
      "grad_norm": 14.529912948608398,
      "learning_rate": 9.650793650793651e-05,
      "loss": 11.2678,
      "step": 460
    },
    {
      "epoch": 0.015248260745258743,
      "grad_norm": 12.937056541442871,
      "learning_rate": 0.00010074074074074073,
      "loss": 10.6223,
      "step": 480
    },
    {
      "epoch": 0.015883604942977858,
      "grad_norm": 12.284934043884277,
      "learning_rate": 0.00010497354497354497,
      "loss": 10.189,
      "step": 500
    },
    {
      "epoch": 0.016518949140696973,
      "grad_norm": 9.824132919311523,
      "learning_rate": 0.0001092063492063492,
      "loss": 9.8138,
      "step": 520
    },
    {
      "epoch": 0.017154293338416088,
      "grad_norm": 8.129488945007324,
      "learning_rate": 0.00011343915343915343,
      "loss": 9.4242,
      "step": 540
    },
    {
      "epoch": 0.017789637536135203,
      "grad_norm": 9.27999496459961,
      "learning_rate": 0.00011767195767195766,
      "loss": 9.1365,
      "step": 560
    },
    {
      "epoch": 0.018424981733854314,
      "grad_norm": 5.250537872314453,
      "learning_rate": 0.00012190476190476193,
      "loss": 8.8276,
      "step": 580
    },
    {
      "epoch": 0.01906032593157343,
      "grad_norm": 5.430091381072998,
      "learning_rate": 0.00012613756613756615,
      "loss": 8.5892,
      "step": 600
    },
    {
      "epoch": 0.019695670129292544,
      "grad_norm": 3.3930234909057617,
      "learning_rate": 0.0001303703703703704,
      "loss": 8.3652,
      "step": 620
    },
    {
      "epoch": 0.02033101432701166,
      "grad_norm": 2.841287136077881,
      "learning_rate": 0.00013460317460317462,
      "loss": 8.1527,
      "step": 640
    },
    {
      "epoch": 0.020966358524730774,
      "grad_norm": 2.188707113265991,
      "learning_rate": 0.00013883597883597885,
      "loss": 7.9891,
      "step": 660
    },
    {
      "epoch": 0.02160170272244989,
      "grad_norm": 2.6337716579437256,
      "learning_rate": 0.0001430687830687831,
      "loss": 7.8345,
      "step": 680
    },
    {
      "epoch": 0.022237046920169,
      "grad_norm": 1.7390124797821045,
      "learning_rate": 0.00014730158730158732,
      "loss": 7.6817,
      "step": 700
    },
    {
      "epoch": 0.022872391117888115,
      "grad_norm": 1.6422362327575684,
      "learning_rate": 0.00015153439153439154,
      "loss": 7.5748,
      "step": 720
    },
    {
      "epoch": 0.02350773531560723,
      "grad_norm": 1.6876453161239624,
      "learning_rate": 0.0001557671957671958,
      "loss": 7.3896,
      "step": 740
    },
    {
      "epoch": 0.024143079513326345,
      "grad_norm": 1.230586290359497,
      "learning_rate": 0.00016,
      "loss": 7.3337,
      "step": 760
    },
    {
      "epoch": 0.02477842371104546,
      "grad_norm": 1.2059415578842163,
      "learning_rate": 0.00016423280423280424,
      "loss": 7.2545,
      "step": 780
    },
    {
      "epoch": 0.025413767908764574,
      "grad_norm": 1.5651260614395142,
      "learning_rate": 0.00016846560846560849,
      "loss": 7.1927,
      "step": 800
    },
    {
      "epoch": 0.02604911210648369,
      "grad_norm": 2.234393358230591,
      "learning_rate": 0.0001726984126984127,
      "loss": 7.1617,
      "step": 820
    },
    {
      "epoch": 0.0266844563042028,
      "grad_norm": 1.6703732013702393,
      "learning_rate": 0.00017693121693121696,
      "loss": 7.093,
      "step": 840
    },
    {
      "epoch": 0.027319800501921915,
      "grad_norm": 0.796870231628418,
      "learning_rate": 0.00018116402116402118,
      "loss": 7.0105,
      "step": 860
    },
    {
      "epoch": 0.02795514469964103,
      "grad_norm": 1.0919573307037354,
      "learning_rate": 0.0001853968253968254,
      "loss": 6.9911,
      "step": 880
    },
    {
      "epoch": 0.028590488897360145,
      "grad_norm": 1.3225408792495728,
      "learning_rate": 0.00018962962962962965,
      "loss": 6.9353,
      "step": 900
    },
    {
      "epoch": 0.02922583309507926,
      "grad_norm": 0.9445711970329285,
      "learning_rate": 0.00019386243386243388,
      "loss": 6.9075,
      "step": 920
    },
    {
      "epoch": 0.029861177292798375,
      "grad_norm": 1.0021796226501465,
      "learning_rate": 0.0001980952380952381,
      "loss": 6.8545,
      "step": 940
    },
    {
      "epoch": 0.030496521490517486,
      "grad_norm": 1.147709608078003,
      "learning_rate": 0.00019999993595464,
      "loss": 6.8145,
      "step": 960
    },
    {
      "epoch": 0.0311318656882366,
      "grad_norm": 1.4438824653625488,
      "learning_rate": 0.00019999949134260042,
      "loss": 6.7156,
      "step": 980
    },
    {
      "epoch": 0.031767209885955716,
      "grad_norm": 1.4000093936920166,
      "learning_rate": 0.0001999986232924222,
      "loss": 6.6363,
      "step": 1000
    },
    {
      "epoch": 0.031767209885955716,
      "eval_loss": 6.87591028213501,
      "eval_runtime": 46.4669,
      "eval_samples_per_second": 58.17,
      "eval_steps_per_second": 29.096,
      "step": 1000
    },
    {
      "epoch": 0.03240255408367483,
      "grad_norm": 2.151993989944458,
      "learning_rate": 0.00019999733180778103,
      "loss": 6.5176,
      "step": 1020
    },
    {
      "epoch": 0.033037898281393946,
      "grad_norm": 1.611135721206665,
      "learning_rate": 0.00019999561689414561,
      "loss": 6.4132,
      "step": 1040
    },
    {
      "epoch": 0.03367324247911306,
      "grad_norm": 2.1010184288024902,
      "learning_rate": 0.00019999347855877755,
      "loss": 6.2465,
      "step": 1060
    },
    {
      "epoch": 0.034308586676832176,
      "grad_norm": 1.5021122694015503,
      "learning_rate": 0.0001999909168107314,
      "loss": 6.1662,
      "step": 1080
    },
    {
      "epoch": 0.03494393087455129,
      "grad_norm": 1.4672967195510864,
      "learning_rate": 0.0001999879316608547,
      "loss": 6.0509,
      "step": 1100
    },
    {
      "epoch": 0.035579275072270405,
      "grad_norm": 1.4146413803100586,
      "learning_rate": 0.0001999845231217877,
      "loss": 5.9012,
      "step": 1120
    },
    {
      "epoch": 0.03621461926998951,
      "grad_norm": 1.252382755279541,
      "learning_rate": 0.00019998069120796358,
      "loss": 5.815,
      "step": 1140
    },
    {
      "epoch": 0.03684996346770863,
      "grad_norm": 1.6317933797836304,
      "learning_rate": 0.0001999764359356082,
      "loss": 5.771,
      "step": 1160
    },
    {
      "epoch": 0.03748530766542774,
      "grad_norm": 1.2354493141174316,
      "learning_rate": 0.0001999717573227401,
      "loss": 5.6189,
      "step": 1180
    },
    {
      "epoch": 0.03812065186314686,
      "grad_norm": 1.1442275047302246,
      "learning_rate": 0.0001999666553891704,
      "loss": 5.5078,
      "step": 1200
    },
    {
      "epoch": 0.03875599606086597,
      "grad_norm": 1.3596833944320679,
      "learning_rate": 0.0001999611301565027,
      "loss": 5.4507,
      "step": 1220
    },
    {
      "epoch": 0.03939134025858509,
      "grad_norm": 1.5420782566070557,
      "learning_rate": 0.00019995518164813315,
      "loss": 5.3225,
      "step": 1240
    },
    {
      "epoch": 0.0400266844563042,
      "grad_norm": 2.335935354232788,
      "learning_rate": 0.00019994880988925007,
      "loss": 5.3398,
      "step": 1260
    },
    {
      "epoch": 0.04066202865402332,
      "grad_norm": 1.2030448913574219,
      "learning_rate": 0.00019994201490683406,
      "loss": 5.2367,
      "step": 1280
    },
    {
      "epoch": 0.04129737285174243,
      "grad_norm": 1.1881422996520996,
      "learning_rate": 0.00019993479672965783,
      "loss": 5.2073,
      "step": 1300
    },
    {
      "epoch": 0.04193271704946155,
      "grad_norm": 1.2961896657943726,
      "learning_rate": 0.00019992715538828609,
      "loss": 5.157,
      "step": 1320
    },
    {
      "epoch": 0.04256806124718066,
      "grad_norm": 0.9343932271003723,
      "learning_rate": 0.00019991909091507525,
      "loss": 5.0156,
      "step": 1340
    },
    {
      "epoch": 0.04320340544489978,
      "grad_norm": 0.9654686450958252,
      "learning_rate": 0.00019991060334417364,
      "loss": 5.054,
      "step": 1360
    },
    {
      "epoch": 0.04383874964261889,
      "grad_norm": 1.4537482261657715,
      "learning_rate": 0.00019990169271152098,
      "loss": 4.9824,
      "step": 1380
    },
    {
      "epoch": 0.044474093840338,
      "grad_norm": 1.0155112743377686,
      "learning_rate": 0.00019989235905484853,
      "loss": 4.8496,
      "step": 1400
    },
    {
      "epoch": 0.045109438038057115,
      "grad_norm": 0.8903729915618896,
      "learning_rate": 0.00019988260241367875,
      "loss": 4.8407,
      "step": 1420
    },
    {
      "epoch": 0.04574478223577623,
      "grad_norm": 1.0020333528518677,
      "learning_rate": 0.00019987242282932518,
      "loss": 4.7753,
      "step": 1440
    },
    {
      "epoch": 0.046380126433495344,
      "grad_norm": 1.2074095010757446,
      "learning_rate": 0.0001998618203448923,
      "loss": 4.6939,
      "step": 1460
    },
    {
      "epoch": 0.04701547063121446,
      "grad_norm": 2.5281686782836914,
      "learning_rate": 0.00019985079500527527,
      "loss": 4.6567,
      "step": 1480
    },
    {
      "epoch": 0.047650814828933574,
      "grad_norm": 1.257580280303955,
      "learning_rate": 0.00019983934685715982,
      "loss": 4.5615,
      "step": 1500
    },
    {
      "epoch": 0.04828615902665269,
      "grad_norm": 1.5581581592559814,
      "learning_rate": 0.00019982747594902203,
      "loss": 4.6081,
      "step": 1520
    },
    {
      "epoch": 0.048921503224371804,
      "grad_norm": 1.029440999031067,
      "learning_rate": 0.0001998151823311281,
      "loss": 4.491,
      "step": 1540
    },
    {
      "epoch": 0.04955684742209092,
      "grad_norm": 0.9729529023170471,
      "learning_rate": 0.0001998024660555342,
      "loss": 4.4692,
      "step": 1560
    },
    {
      "epoch": 0.050192191619810034,
      "grad_norm": 1.1230270862579346,
      "learning_rate": 0.00019978932717608613,
      "loss": 4.3839,
      "step": 1580
    },
    {
      "epoch": 0.05082753581752915,
      "grad_norm": 1.048663854598999,
      "learning_rate": 0.0001997757657484192,
      "loss": 4.3907,
      "step": 1600
    },
    {
      "epoch": 0.051462880015248263,
      "grad_norm": 1.2080233097076416,
      "learning_rate": 0.000199761781829958,
      "loss": 4.3147,
      "step": 1620
    },
    {
      "epoch": 0.05209822421296738,
      "grad_norm": 1.1026450395584106,
      "learning_rate": 0.000199747375479916,
      "loss": 4.2496,
      "step": 1640
    },
    {
      "epoch": 0.052733568410686486,
      "grad_norm": 1.037937879562378,
      "learning_rate": 0.00019973254675929554,
      "loss": 4.2614,
      "step": 1660
    },
    {
      "epoch": 0.0533689126084056,
      "grad_norm": 1.1000276803970337,
      "learning_rate": 0.00019971729573088742,
      "loss": 4.1367,
      "step": 1680
    },
    {
      "epoch": 0.054004256806124716,
      "grad_norm": 1.4259387254714966,
      "learning_rate": 0.0001997016224592706,
      "loss": 4.1126,
      "step": 1700
    },
    {
      "epoch": 0.05463960100384383,
      "grad_norm": 1.2918739318847656,
      "learning_rate": 0.00019968552701081203,
      "loss": 4.0945,
      "step": 1720
    },
    {
      "epoch": 0.055274945201562946,
      "grad_norm": 1.0148296356201172,
      "learning_rate": 0.00019966900945366634,
      "loss": 3.9981,
      "step": 1740
    },
    {
      "epoch": 0.05591028939928206,
      "grad_norm": 1.4177788496017456,
      "learning_rate": 0.0001996520698577755,
      "loss": 3.9247,
      "step": 1760
    },
    {
      "epoch": 0.056545633597001176,
      "grad_norm": 1.1384249925613403,
      "learning_rate": 0.00019963470829486858,
      "loss": 3.9204,
      "step": 1780
    },
    {
      "epoch": 0.05718097779472029,
      "grad_norm": 1.2175607681274414,
      "learning_rate": 0.0001996169248384615,
      "loss": 3.9023,
      "step": 1800
    },
    {
      "epoch": 0.057816321992439405,
      "grad_norm": 1.7040660381317139,
      "learning_rate": 0.0001995987195638565,
      "loss": 3.8349,
      "step": 1820
    },
    {
      "epoch": 0.05845166619015852,
      "grad_norm": 1.4229464530944824,
      "learning_rate": 0.0001995800925481421,
      "loss": 3.7969,
      "step": 1840
    },
    {
      "epoch": 0.059087010387877635,
      "grad_norm": 1.1412523984909058,
      "learning_rate": 0.0001995610438701925,
      "loss": 3.6494,
      "step": 1860
    },
    {
      "epoch": 0.05972235458559675,
      "grad_norm": 1.3119606971740723,
      "learning_rate": 0.00019954157361066764,
      "loss": 3.6137,
      "step": 1880
    },
    {
      "epoch": 0.06035769878331586,
      "grad_norm": 1.260469675064087,
      "learning_rate": 0.0001995216818520123,
      "loss": 3.5703,
      "step": 1900
    },
    {
      "epoch": 0.06099304298103497,
      "grad_norm": 1.6222745180130005,
      "learning_rate": 0.00019950136867845627,
      "loss": 3.4526,
      "step": 1920
    },
    {
      "epoch": 0.06162838717875409,
      "grad_norm": 1.399109125137329,
      "learning_rate": 0.00019948063417601369,
      "loss": 3.4467,
      "step": 1940
    },
    {
      "epoch": 0.0622637313764732,
      "grad_norm": 1.1804718971252441,
      "learning_rate": 0.00019945947843248276,
      "loss": 3.3017,
      "step": 1960
    },
    {
      "epoch": 0.06289907557419232,
      "grad_norm": 1.1146492958068848,
      "learning_rate": 0.0001994379015374455,
      "loss": 3.2564,
      "step": 1980
    },
    {
      "epoch": 0.06353441977191143,
      "grad_norm": 1.3201006650924683,
      "learning_rate": 0.00019941590358226713,
      "loss": 3.2076,
      "step": 2000
    },
    {
      "epoch": 0.06353441977191143,
      "eval_loss": 3.1886417865753174,
      "eval_runtime": 45.0925,
      "eval_samples_per_second": 59.943,
      "eval_steps_per_second": 29.983,
      "step": 2000
    },
    {
      "epoch": 0.06416976396963055,
      "grad_norm": 1.4352892637252808,
      "learning_rate": 0.00019939348466009588,
      "loss": 3.1246,
      "step": 2020
    },
    {
      "epoch": 0.06480510816734966,
      "grad_norm": 1.4391227960586548,
      "learning_rate": 0.0001993706448658625,
      "loss": 3.1187,
      "step": 2040
    },
    {
      "epoch": 0.06544045236506878,
      "grad_norm": 1.2951711416244507,
      "learning_rate": 0.0001993473842962798,
      "loss": 3.0175,
      "step": 2060
    },
    {
      "epoch": 0.06607579656278789,
      "grad_norm": 1.559552550315857,
      "learning_rate": 0.00019932370304984255,
      "loss": 2.8894,
      "step": 2080
    },
    {
      "epoch": 0.066711140760507,
      "grad_norm": 1.2822929620742798,
      "learning_rate": 0.00019929960122682655,
      "loss": 2.8483,
      "step": 2100
    },
    {
      "epoch": 0.06734648495822612,
      "grad_norm": 1.4227052927017212,
      "learning_rate": 0.00019927507892928873,
      "loss": 2.8691,
      "step": 2120
    },
    {
      "epoch": 0.06798182915594524,
      "grad_norm": 1.643660306930542,
      "learning_rate": 0.00019925013626106633,
      "loss": 2.8578,
      "step": 2140
    },
    {
      "epoch": 0.06861717335366435,
      "grad_norm": 1.1360414028167725,
      "learning_rate": 0.00019922477332777664,
      "loss": 2.7094,
      "step": 2160
    },
    {
      "epoch": 0.06925251755138347,
      "grad_norm": 1.224853277206421,
      "learning_rate": 0.00019919899023681658,
      "loss": 2.6953,
      "step": 2180
    },
    {
      "epoch": 0.06988786174910258,
      "grad_norm": 1.093682885169983,
      "learning_rate": 0.00019917278709736212,
      "loss": 2.6255,
      "step": 2200
    },
    {
      "epoch": 0.0705232059468217,
      "grad_norm": 1.238864779472351,
      "learning_rate": 0.00019914616402036796,
      "loss": 2.5893,
      "step": 2220
    },
    {
      "epoch": 0.07115855014454081,
      "grad_norm": 1.1016559600830078,
      "learning_rate": 0.00019911912111856688,
      "loss": 2.4743,
      "step": 2240
    },
    {
      "epoch": 0.07179389434225993,
      "grad_norm": 1.12881600856781,
      "learning_rate": 0.00019909165850646941,
      "loss": 2.5057,
      "step": 2260
    },
    {
      "epoch": 0.07242923853997903,
      "grad_norm": 1.216238021850586,
      "learning_rate": 0.00019906377630036338,
      "loss": 2.4624,
      "step": 2280
    },
    {
      "epoch": 0.07306458273769814,
      "grad_norm": 1.1429589986801147,
      "learning_rate": 0.00019903547461831323,
      "loss": 2.3835,
      "step": 2300
    },
    {
      "epoch": 0.07369992693541726,
      "grad_norm": 0.9367678165435791,
      "learning_rate": 0.00019900675358015967,
      "loss": 2.3971,
      "step": 2320
    },
    {
      "epoch": 0.07433527113313637,
      "grad_norm": 1.0869677066802979,
      "learning_rate": 0.00019897761330751922,
      "loss": 2.3241,
      "step": 2340
    },
    {
      "epoch": 0.07497061533085549,
      "grad_norm": 0.958840548992157,
      "learning_rate": 0.0001989480539237835,
      "loss": 2.2828,
      "step": 2360
    },
    {
      "epoch": 0.0756059595285746,
      "grad_norm": 0.9724891781806946,
      "learning_rate": 0.00019891807555411884,
      "loss": 2.2858,
      "step": 2380
    },
    {
      "epoch": 0.07624130372629372,
      "grad_norm": 1.045828104019165,
      "learning_rate": 0.00019888767832546572,
      "loss": 2.2949,
      "step": 2400
    },
    {
      "epoch": 0.07687664792401283,
      "grad_norm": 1.0283712148666382,
      "learning_rate": 0.0001988568623665383,
      "loss": 2.2034,
      "step": 2420
    },
    {
      "epoch": 0.07751199212173195,
      "grad_norm": 1.0930371284484863,
      "learning_rate": 0.00019882562780782376,
      "loss": 2.2283,
      "step": 2440
    },
    {
      "epoch": 0.07814733631945106,
      "grad_norm": 0.892132580280304,
      "learning_rate": 0.00019879397478158177,
      "loss": 2.1872,
      "step": 2460
    },
    {
      "epoch": 0.07878268051717018,
      "grad_norm": 1.0107035636901855,
      "learning_rate": 0.00019876190342184402,
      "loss": 2.1874,
      "step": 2480
    },
    {
      "epoch": 0.07941802471488929,
      "grad_norm": 1.1195555925369263,
      "learning_rate": 0.00019872941386441358,
      "loss": 2.0823,
      "step": 2500
    },
    {
      "epoch": 0.0800533689126084,
      "grad_norm": 1.2803888320922852,
      "learning_rate": 0.0001986965062468643,
      "loss": 2.0905,
      "step": 2520
    },
    {
      "epoch": 0.08068871311032752,
      "grad_norm": 1.0955703258514404,
      "learning_rate": 0.00019866318070854033,
      "loss": 2.0645,
      "step": 2540
    },
    {
      "epoch": 0.08132405730804664,
      "grad_norm": 1.117477297782898,
      "learning_rate": 0.00019862943739055536,
      "loss": 2.0259,
      "step": 2560
    },
    {
      "epoch": 0.08195940150576575,
      "grad_norm": 0.9660820960998535,
      "learning_rate": 0.0001985952764357923,
      "loss": 1.9881,
      "step": 2580
    },
    {
      "epoch": 0.08259474570348486,
      "grad_norm": 0.9186820983886719,
      "learning_rate": 0.0001985606979889023,
      "loss": 1.9571,
      "step": 2600
    },
    {
      "epoch": 0.08323008990120398,
      "grad_norm": 1.1236801147460938,
      "learning_rate": 0.00019852570219630445,
      "loss": 1.9506,
      "step": 2620
    },
    {
      "epoch": 0.0838654340989231,
      "grad_norm": 0.9719575047492981,
      "learning_rate": 0.0001984902892061851,
      "loss": 1.9359,
      "step": 2640
    },
    {
      "epoch": 0.08450077829664221,
      "grad_norm": 1.3401118516921997,
      "learning_rate": 0.00019845445916849704,
      "loss": 1.9707,
      "step": 2660
    },
    {
      "epoch": 0.08513612249436132,
      "grad_norm": 0.980446457862854,
      "learning_rate": 0.00019841821223495916,
      "loss": 1.88,
      "step": 2680
    },
    {
      "epoch": 0.08577146669208044,
      "grad_norm": 1.178143858909607,
      "learning_rate": 0.00019838154855905552,
      "loss": 1.8629,
      "step": 2700
    },
    {
      "epoch": 0.08640681088979955,
      "grad_norm": 0.9232170581817627,
      "learning_rate": 0.00019834446829603494,
      "loss": 1.8467,
      "step": 2720
    },
    {
      "epoch": 0.08704215508751867,
      "grad_norm": 1.7343891859054565,
      "learning_rate": 0.00019830697160291017,
      "loss": 1.8194,
      "step": 2740
    },
    {
      "epoch": 0.08767749928523778,
      "grad_norm": 0.878983199596405,
      "learning_rate": 0.0001982690586384573,
      "loss": 1.8232,
      "step": 2760
    },
    {
      "epoch": 0.0883128434829569,
      "grad_norm": 1.0917317867279053,
      "learning_rate": 0.00019823072956321513,
      "loss": 1.7668,
      "step": 2780
    },
    {
      "epoch": 0.088948187680676,
      "grad_norm": 1.0753387212753296,
      "learning_rate": 0.00019819198453948443,
      "loss": 1.7968,
      "step": 2800
    },
    {
      "epoch": 0.08958353187839511,
      "grad_norm": 1.0904388427734375,
      "learning_rate": 0.00019815282373132718,
      "loss": 1.7834,
      "step": 2820
    },
    {
      "epoch": 0.09021887607611423,
      "grad_norm": 0.9622576236724854,
      "learning_rate": 0.00019811324730456607,
      "loss": 1.7773,
      "step": 2840
    },
    {
      "epoch": 0.09085422027383334,
      "grad_norm": 0.8677240610122681,
      "learning_rate": 0.0001980732554267836,
      "loss": 1.7322,
      "step": 2860
    },
    {
      "epoch": 0.09148956447155246,
      "grad_norm": 1.0953987836837769,
      "learning_rate": 0.0001980328482673215,
      "loss": 1.7123,
      "step": 2880
    },
    {
      "epoch": 0.09212490866927157,
      "grad_norm": 1.0277127027511597,
      "learning_rate": 0.00019799202599727998,
      "loss": 1.7558,
      "step": 2900
    },
    {
      "epoch": 0.09276025286699069,
      "grad_norm": 1.1174383163452148,
      "learning_rate": 0.000197950788789517,
      "loss": 1.7222,
      "step": 2920
    },
    {
      "epoch": 0.0933955970647098,
      "grad_norm": 0.9651451706886292,
      "learning_rate": 0.00019790913681864747,
      "loss": 1.6652,
      "step": 2940
    },
    {
      "epoch": 0.09403094126242892,
      "grad_norm": 0.9669461250305176,
      "learning_rate": 0.00019786707026104265,
      "loss": 1.6381,
      "step": 2960
    },
    {
      "epoch": 0.09466628546014803,
      "grad_norm": 0.9406834244728088,
      "learning_rate": 0.0001978245892948293,
      "loss": 1.6276,
      "step": 2980
    },
    {
      "epoch": 0.09530162965786715,
      "grad_norm": 0.9768303632736206,
      "learning_rate": 0.0001977816940998889,
      "loss": 1.6071,
      "step": 3000
    },
    {
      "epoch": 0.09530162965786715,
      "eval_loss": 1.5878759622573853,
      "eval_runtime": 45.9166,
      "eval_samples_per_second": 58.868,
      "eval_steps_per_second": 29.445,
      "step": 3000
    },
    {
      "epoch": 0.09593697385558626,
      "grad_norm": 1.083208441734314,
      "learning_rate": 0.00019773838485785702,
      "loss": 1.6341,
      "step": 3020
    },
    {
      "epoch": 0.09657231805330538,
      "grad_norm": 0.9333330988883972,
      "learning_rate": 0.00019769466175212244,
      "loss": 1.5931,
      "step": 3040
    },
    {
      "epoch": 0.0972076622510245,
      "grad_norm": 0.9718533754348755,
      "learning_rate": 0.00019765052496782638,
      "loss": 1.5735,
      "step": 3060
    },
    {
      "epoch": 0.09784300644874361,
      "grad_norm": 1.2169800996780396,
      "learning_rate": 0.00019760597469186184,
      "loss": 1.5507,
      "step": 3080
    },
    {
      "epoch": 0.09847835064646272,
      "grad_norm": 0.9822967648506165,
      "learning_rate": 0.00019756101111287257,
      "loss": 1.5784,
      "step": 3100
    },
    {
      "epoch": 0.09911369484418184,
      "grad_norm": 0.9830970168113708,
      "learning_rate": 0.0001975156344212525,
      "loss": 1.5473,
      "step": 3120
    },
    {
      "epoch": 0.09974903904190095,
      "grad_norm": 0.8926035761833191,
      "learning_rate": 0.00019746984480914484,
      "loss": 1.5141,
      "step": 3140
    },
    {
      "epoch": 0.10038438323962007,
      "grad_norm": 0.8814927339553833,
      "learning_rate": 0.00019742364247044125,
      "loss": 1.5164,
      "step": 3160
    },
    {
      "epoch": 0.10101972743733918,
      "grad_norm": 0.8626115322113037,
      "learning_rate": 0.00019737702760078105,
      "loss": 1.4495,
      "step": 3180
    },
    {
      "epoch": 0.1016550716350583,
      "grad_norm": 1.0857669115066528,
      "learning_rate": 0.00019733000039755036,
      "loss": 1.511,
      "step": 3200
    },
    {
      "epoch": 0.10229041583277741,
      "grad_norm": 0.8834457397460938,
      "learning_rate": 0.00019728256105988132,
      "loss": 1.4764,
      "step": 3220
    },
    {
      "epoch": 0.10292576003049653,
      "grad_norm": 0.8241048455238342,
      "learning_rate": 0.00019723470978865118,
      "loss": 1.4253,
      "step": 3240
    },
    {
      "epoch": 0.10356110422821564,
      "grad_norm": 0.9844352006912231,
      "learning_rate": 0.00019718644678648158,
      "loss": 1.4595,
      "step": 3260
    },
    {
      "epoch": 0.10419644842593476,
      "grad_norm": 0.8982945084571838,
      "learning_rate": 0.00019713777225773745,
      "loss": 1.3535,
      "step": 3280
    },
    {
      "epoch": 0.10483179262365386,
      "grad_norm": 1.2204469442367554,
      "learning_rate": 0.0001970886864085263,
      "loss": 1.4283,
      "step": 3300
    },
    {
      "epoch": 0.10546713682137297,
      "grad_norm": 1.0676652193069458,
      "learning_rate": 0.00019703918944669754,
      "loss": 1.3858,
      "step": 3320
    },
    {
      "epoch": 0.10610248101909209,
      "grad_norm": 1.17191743850708,
      "learning_rate": 0.00019698928158184116,
      "loss": 1.4426,
      "step": 3340
    },
    {
      "epoch": 0.1067378252168112,
      "grad_norm": 0.9601316452026367,
      "learning_rate": 0.00019693896302528716,
      "loss": 1.3621,
      "step": 3360
    },
    {
      "epoch": 0.10737316941453032,
      "grad_norm": 0.9755037426948547,
      "learning_rate": 0.00019688823399010463,
      "loss": 1.3901,
      "step": 3380
    },
    {
      "epoch": 0.10800851361224943,
      "grad_norm": 1.0111849308013916,
      "learning_rate": 0.0001968370946911007,
      "loss": 1.3748,
      "step": 3400
    },
    {
      "epoch": 0.10864385780996855,
      "grad_norm": 0.8471179604530334,
      "learning_rate": 0.00019678554534481978,
      "loss": 1.3227,
      "step": 3420
    },
    {
      "epoch": 0.10927920200768766,
      "grad_norm": 0.9206441640853882,
      "learning_rate": 0.0001967335861695426,
      "loss": 1.3493,
      "step": 3440
    },
    {
      "epoch": 0.10991454620540678,
      "grad_norm": 1.055109977722168,
      "learning_rate": 0.0001966812173852852,
      "loss": 1.3549,
      "step": 3460
    },
    {
      "epoch": 0.11054989040312589,
      "grad_norm": 0.995614767074585,
      "learning_rate": 0.00019662843921379816,
      "loss": 1.3468,
      "step": 3480
    },
    {
      "epoch": 0.111185234600845,
      "grad_norm": 0.8873100876808167,
      "learning_rate": 0.0001965752518785655,
      "loss": 1.3129,
      "step": 3500
    },
    {
      "epoch": 0.11182057879856412,
      "grad_norm": 0.9802286624908447,
      "learning_rate": 0.00019652165560480383,
      "loss": 1.34,
      "step": 3520
    },
    {
      "epoch": 0.11245592299628324,
      "grad_norm": 0.9177120923995972,
      "learning_rate": 0.00019646765061946133,
      "loss": 1.3321,
      "step": 3540
    },
    {
      "epoch": 0.11309126719400235,
      "grad_norm": 1.0982646942138672,
      "learning_rate": 0.00019641323715121692,
      "loss": 1.292,
      "step": 3560
    },
    {
      "epoch": 0.11372661139172147,
      "grad_norm": 1.1567240953445435,
      "learning_rate": 0.00019635841543047918,
      "loss": 1.3052,
      "step": 3580
    },
    {
      "epoch": 0.11436195558944058,
      "grad_norm": 0.8516421914100647,
      "learning_rate": 0.00019630318568938528,
      "loss": 1.3189,
      "step": 3600
    },
    {
      "epoch": 0.1149972997871597,
      "grad_norm": 0.9710924029350281,
      "learning_rate": 0.00019624754816180022,
      "loss": 1.2644,
      "step": 3620
    },
    {
      "epoch": 0.11563264398487881,
      "grad_norm": 0.9252649545669556,
      "learning_rate": 0.00019619150308331572,
      "loss": 1.2517,
      "step": 3640
    },
    {
      "epoch": 0.11626798818259793,
      "grad_norm": 0.973948061466217,
      "learning_rate": 0.0001961350506912493,
      "loss": 1.2651,
      "step": 3660
    },
    {
      "epoch": 0.11690333238031704,
      "grad_norm": 0.9078177213668823,
      "learning_rate": 0.000196078191224643,
      "loss": 1.2089,
      "step": 3680
    },
    {
      "epoch": 0.11753867657803616,
      "grad_norm": 0.8456325531005859,
      "learning_rate": 0.0001960209249242628,
      "loss": 1.2503,
      "step": 3700
    },
    {
      "epoch": 0.11817402077575527,
      "grad_norm": 1.2014869451522827,
      "learning_rate": 0.00019596325203259722,
      "loss": 1.2287,
      "step": 3720
    },
    {
      "epoch": 0.11880936497347439,
      "grad_norm": 0.903296172618866,
      "learning_rate": 0.0001959051727938566,
      "loss": 1.1999,
      "step": 3740
    },
    {
      "epoch": 0.1194447091711935,
      "grad_norm": 0.9159349799156189,
      "learning_rate": 0.00019584668745397182,
      "loss": 1.2077,
      "step": 3760
    },
    {
      "epoch": 0.12008005336891261,
      "grad_norm": 1.0457518100738525,
      "learning_rate": 0.00019578779626059332,
      "loss": 1.2395,
      "step": 3780
    },
    {
      "epoch": 0.12071539756663172,
      "grad_norm": 0.8328551650047302,
      "learning_rate": 0.0001957284994630902,
      "loss": 1.2039,
      "step": 3800
    },
    {
      "epoch": 0.12135074176435083,
      "grad_norm": 0.9112881422042847,
      "learning_rate": 0.00019566879731254902,
      "loss": 1.1987,
      "step": 3820
    },
    {
      "epoch": 0.12198608596206995,
      "grad_norm": 2.0256752967834473,
      "learning_rate": 0.00019560869006177262,
      "loss": 1.1923,
      "step": 3840
    },
    {
      "epoch": 0.12262143015978906,
      "grad_norm": 0.9714537262916565,
      "learning_rate": 0.00019554817796527943,
      "loss": 1.1726,
      "step": 3860
    },
    {
      "epoch": 0.12325677435750818,
      "grad_norm": 0.8522310256958008,
      "learning_rate": 0.00019548726127930198,
      "loss": 1.1985,
      "step": 3880
    },
    {
      "epoch": 0.12389211855522729,
      "grad_norm": 0.8728988766670227,
      "learning_rate": 0.00019542594026178612,
      "loss": 1.1662,
      "step": 3900
    },
    {
      "epoch": 0.1245274627529464,
      "grad_norm": 0.9155168533325195,
      "learning_rate": 0.00019536421517238973,
      "loss": 1.1529,
      "step": 3920
    },
    {
      "epoch": 0.12516280695066553,
      "grad_norm": 1.05704665184021,
      "learning_rate": 0.0001953020862724817,
      "loss": 1.1415,
      "step": 3940
    },
    {
      "epoch": 0.12579815114838463,
      "grad_norm": 0.7793872952461243,
      "learning_rate": 0.0001952395538251408,
      "loss": 1.1387,
      "step": 3960
    },
    {
      "epoch": 0.12643349534610376,
      "grad_norm": 0.9358331561088562,
      "learning_rate": 0.00019517661809515465,
      "loss": 1.1816,
      "step": 3980
    },
    {
      "epoch": 0.12706883954382286,
      "grad_norm": 0.8175097107887268,
      "learning_rate": 0.00019511327934901846,
      "loss": 1.126,
      "step": 4000
    },
    {
      "epoch": 0.12706883954382286,
      "eval_loss": 1.1081569194793701,
      "eval_runtime": 128.6977,
      "eval_samples_per_second": 21.003,
      "eval_steps_per_second": 10.505,
      "step": 4000
    },
    {
      "epoch": 0.127704183741542,
      "grad_norm": 0.9568232893943787,
      "learning_rate": 0.000195049537854934,
      "loss": 1.1002,
      "step": 4020
    },
    {
      "epoch": 0.1283395279392611,
      "grad_norm": 0.9011651277542114,
      "learning_rate": 0.00019498539388280848,
      "loss": 1.129,
      "step": 4040
    },
    {
      "epoch": 0.1289748721369802,
      "grad_norm": 1.045811653137207,
      "learning_rate": 0.00019492084770425327,
      "loss": 1.0945,
      "step": 4060
    },
    {
      "epoch": 0.12961021633469932,
      "grad_norm": 0.8668608069419861,
      "learning_rate": 0.00019485589959258292,
      "loss": 1.0601,
      "step": 4080
    },
    {
      "epoch": 0.13024556053241843,
      "grad_norm": 0.9976728558540344,
      "learning_rate": 0.00019479054982281393,
      "loss": 1.1127,
      "step": 4100
    },
    {
      "epoch": 0.13088090473013755,
      "grad_norm": 0.9135074019432068,
      "learning_rate": 0.00019472479867166354,
      "loss": 1.0708,
      "step": 4120
    },
    {
      "epoch": 0.13151624892785665,
      "grad_norm": 0.8302998542785645,
      "learning_rate": 0.0001946586464175486,
      "loss": 1.0925,
      "step": 4140
    },
    {
      "epoch": 0.13215159312557578,
      "grad_norm": 0.9594709277153015,
      "learning_rate": 0.0001945920933405844,
      "loss": 1.0879,
      "step": 4160
    },
    {
      "epoch": 0.13278693732329488,
      "grad_norm": 1.3145122528076172,
      "learning_rate": 0.00019452513972258352,
      "loss": 1.0706,
      "step": 4180
    },
    {
      "epoch": 0.133422281521014,
      "grad_norm": 1.0521440505981445,
      "learning_rate": 0.00019445778584705452,
      "loss": 1.1089,
      "step": 4200
    },
    {
      "epoch": 0.13405762571873311,
      "grad_norm": 1.1046104431152344,
      "learning_rate": 0.00019439003199920088,
      "loss": 1.0965,
      "step": 4220
    },
    {
      "epoch": 0.13469296991645224,
      "grad_norm": 1.1228617429733276,
      "learning_rate": 0.00019432187846591967,
      "loss": 1.0747,
      "step": 4240
    },
    {
      "epoch": 0.13532831411417134,
      "grad_norm": 0.8399156332015991,
      "learning_rate": 0.00019425332553580044,
      "loss": 1.0239,
      "step": 4260
    },
    {
      "epoch": 0.13596365831189047,
      "grad_norm": 0.9118017554283142,
      "learning_rate": 0.00019418437349912385,
      "loss": 1.0557,
      "step": 4280
    },
    {
      "epoch": 0.13659900250960957,
      "grad_norm": 1.1154282093048096,
      "learning_rate": 0.00019411502264786069,
      "loss": 1.0846,
      "step": 4300
    },
    {
      "epoch": 0.1372343467073287,
      "grad_norm": 0.8457648158073425,
      "learning_rate": 0.00019404527327567035,
      "loss": 1.0438,
      "step": 4320
    },
    {
      "epoch": 0.1378696909050478,
      "grad_norm": 0.9336498975753784,
      "learning_rate": 0.0001939751256778998,
      "loss": 1.0403,
      "step": 4340
    },
    {
      "epoch": 0.13850503510276693,
      "grad_norm": 0.9318077564239502,
      "learning_rate": 0.0001939045801515822,
      "loss": 1.0375,
      "step": 4360
    },
    {
      "epoch": 0.13914037930048603,
      "grad_norm": 0.9146689176559448,
      "learning_rate": 0.0001938336369954358,
      "loss": 1.0394,
      "step": 4380
    },
    {
      "epoch": 0.13977572349820516,
      "grad_norm": 1.2244622707366943,
      "learning_rate": 0.00019376229650986245,
      "loss": 1.0305,
      "step": 4400
    },
    {
      "epoch": 0.14041106769592426,
      "grad_norm": 0.9721834659576416,
      "learning_rate": 0.00019369055899694652,
      "loss": 1.0133,
      "step": 4420
    },
    {
      "epoch": 0.1410464118936434,
      "grad_norm": 0.8538774251937866,
      "learning_rate": 0.00019361842476045356,
      "loss": 1.0272,
      "step": 4440
    },
    {
      "epoch": 0.1416817560913625,
      "grad_norm": 0.7733943462371826,
      "learning_rate": 0.000193545894105829,
      "loss": 1.0328,
      "step": 4460
    },
    {
      "epoch": 0.14231710028908162,
      "grad_norm": 1.0937755107879639,
      "learning_rate": 0.00019347296734019683,
      "loss": 1.0501,
      "step": 4480
    },
    {
      "epoch": 0.14295244448680072,
      "grad_norm": 0.8855345845222473,
      "learning_rate": 0.00019339964477235836,
      "loss": 0.9979,
      "step": 4500
    },
    {
      "epoch": 0.14358778868451985,
      "grad_norm": 0.9113184213638306,
      "learning_rate": 0.0001933259267127909,
      "loss": 0.967,
      "step": 4520
    },
    {
      "epoch": 0.14422313288223895,
      "grad_norm": 0.9671328663825989,
      "learning_rate": 0.00019325181347364643,
      "loss": 1.016,
      "step": 4540
    },
    {
      "epoch": 0.14485847707995805,
      "grad_norm": 0.8655368685722351,
      "learning_rate": 0.00019317730536875022,
      "loss": 1.0005,
      "step": 4560
    },
    {
      "epoch": 0.14549382127767718,
      "grad_norm": 0.8673165440559387,
      "learning_rate": 0.00019310240271359967,
      "loss": 0.9697,
      "step": 4580
    },
    {
      "epoch": 0.14612916547539628,
      "grad_norm": 1.0993086099624634,
      "learning_rate": 0.00019302710582536276,
      "loss": 0.9832,
      "step": 4600
    },
    {
      "epoch": 0.1467645096731154,
      "grad_norm": 1.1561827659606934,
      "learning_rate": 0.00019295141502287687,
      "loss": 0.9603,
      "step": 4620
    },
    {
      "epoch": 0.1473998538708345,
      "grad_norm": 1.0052567720413208,
      "learning_rate": 0.00019287533062664733,
      "loss": 0.9808,
      "step": 4640
    },
    {
      "epoch": 0.14803519806855364,
      "grad_norm": 0.9202858209609985,
      "learning_rate": 0.00019279885295884618,
      "loss": 0.9564,
      "step": 4660
    },
    {
      "epoch": 0.14867054226627274,
      "grad_norm": 0.8606549501419067,
      "learning_rate": 0.0001927219823433106,
      "loss": 0.9936,
      "step": 4680
    },
    {
      "epoch": 0.14930588646399187,
      "grad_norm": 0.9188569784164429,
      "learning_rate": 0.00019264471910554183,
      "loss": 0.9833,
      "step": 4700
    },
    {
      "epoch": 0.14994123066171097,
      "grad_norm": 0.7773941159248352,
      "learning_rate": 0.0001925670635727035,
      "loss": 0.9272,
      "step": 4720
    },
    {
      "epoch": 0.1505765748594301,
      "grad_norm": 0.8689327836036682,
      "learning_rate": 0.00019248901607362047,
      "loss": 0.9462,
      "step": 4740
    },
    {
      "epoch": 0.1512119190571492,
      "grad_norm": 0.800255298614502,
      "learning_rate": 0.00019241057693877725,
      "loss": 0.9222,
      "step": 4760
    },
    {
      "epoch": 0.15184726325486833,
      "grad_norm": 0.9326597452163696,
      "learning_rate": 0.0001923317465003168,
      "loss": 0.961,
      "step": 4780
    },
    {
      "epoch": 0.15248260745258743,
      "grad_norm": 1.072416067123413,
      "learning_rate": 0.00019225252509203888,
      "loss": 0.9464,
      "step": 4800
    },
    {
      "epoch": 0.15311795165030656,
      "grad_norm": 0.9187152981758118,
      "learning_rate": 0.0001921729130493989,
      "loss": 0.9461,
      "step": 4820
    },
    {
      "epoch": 0.15375329584802566,
      "grad_norm": 0.8737976551055908,
      "learning_rate": 0.00019209291070950633,
      "loss": 0.8771,
      "step": 4840
    },
    {
      "epoch": 0.1543886400457448,
      "grad_norm": 0.9321054220199585,
      "learning_rate": 0.0001920125184111233,
      "loss": 0.9179,
      "step": 4860
    },
    {
      "epoch": 0.1550239842434639,
      "grad_norm": 0.7673978209495544,
      "learning_rate": 0.00019193173649466322,
      "loss": 0.8711,
      "step": 4880
    },
    {
      "epoch": 0.15565932844118302,
      "grad_norm": 1.0326552391052246,
      "learning_rate": 0.00019185056530218923,
      "loss": 0.9494,
      "step": 4900
    },
    {
      "epoch": 0.15629467263890212,
      "grad_norm": 0.8184536695480347,
      "learning_rate": 0.0001917690051774129,
      "loss": 0.9201,
      "step": 4920
    },
    {
      "epoch": 0.15693001683662125,
      "grad_norm": 0.8319898247718811,
      "learning_rate": 0.0001916870564656926,
      "loss": 0.9167,
      "step": 4940
    },
    {
      "epoch": 0.15756536103434035,
      "grad_norm": 1.0563160181045532,
      "learning_rate": 0.0001916047195140323,
      "loss": 0.8993,
      "step": 4960
    },
    {
      "epoch": 0.15820070523205948,
      "grad_norm": 0.8466194868087769,
      "learning_rate": 0.00019152199467107974,
      "loss": 0.9198,
      "step": 4980
    },
    {
      "epoch": 0.15883604942977858,
      "grad_norm": 1.1115593910217285,
      "learning_rate": 0.00019143888228712527,
      "loss": 0.8749,
      "step": 5000
    },
    {
      "epoch": 0.15883604942977858,
      "eval_loss": 0.8843944668769836,
      "eval_runtime": 127.8707,
      "eval_samples_per_second": 21.139,
      "eval_steps_per_second": 10.573,
      "step": 5000
    },
    {
      "epoch": 0.1594713936274977,
      "grad_norm": 0.9679493308067322,
      "learning_rate": 0.00019135538271410022,
      "loss": 0.9212,
      "step": 5020
    },
    {
      "epoch": 0.1601067378252168,
      "grad_norm": 0.8485816121101379,
      "learning_rate": 0.0001912714963055754,
      "loss": 0.9054,
      "step": 5040
    },
    {
      "epoch": 0.16074208202293594,
      "grad_norm": 1.0210843086242676,
      "learning_rate": 0.0001911872234167597,
      "loss": 0.917,
      "step": 5060
    },
    {
      "epoch": 0.16137742622065504,
      "grad_norm": 1.0072481632232666,
      "learning_rate": 0.00019110256440449844,
      "loss": 0.9014,
      "step": 5080
    },
    {
      "epoch": 0.16201277041837414,
      "grad_norm": 0.9833612442016602,
      "learning_rate": 0.00019101751962727204,
      "loss": 0.891,
      "step": 5100
    },
    {
      "epoch": 0.16264811461609327,
      "grad_norm": 1.0564861297607422,
      "learning_rate": 0.0001909320894451943,
      "loss": 0.8581,
      "step": 5120
    },
    {
      "epoch": 0.16328345881381237,
      "grad_norm": 1.1205075979232788,
      "learning_rate": 0.0001908462742200111,
      "loss": 0.8884,
      "step": 5140
    },
    {
      "epoch": 0.1639188030115315,
      "grad_norm": 0.9841699004173279,
      "learning_rate": 0.0001907600743150986,
      "loss": 0.8815,
      "step": 5160
    },
    {
      "epoch": 0.1645541472092506,
      "grad_norm": 0.852820098400116,
      "learning_rate": 0.00019067349009546197,
      "loss": 0.8594,
      "step": 5180
    },
    {
      "epoch": 0.16518949140696973,
      "grad_norm": 0.8630360960960388,
      "learning_rate": 0.00019058652192773372,
      "loss": 0.8653,
      "step": 5200
    },
    {
      "epoch": 0.16582483560468883,
      "grad_norm": 1.0112591981887817,
      "learning_rate": 0.00019049917018017207,
      "loss": 0.8715,
      "step": 5220
    },
    {
      "epoch": 0.16646017980240796,
      "grad_norm": 0.9182717204093933,
      "learning_rate": 0.00019041143522265948,
      "loss": 0.8875,
      "step": 5240
    },
    {
      "epoch": 0.16709552400012706,
      "grad_norm": 1.190596103668213,
      "learning_rate": 0.0001903233174267012,
      "loss": 0.9027,
      "step": 5260
    },
    {
      "epoch": 0.1677308681978462,
      "grad_norm": 0.8345910310745239,
      "learning_rate": 0.00019023481716542342,
      "loss": 0.8819,
      "step": 5280
    },
    {
      "epoch": 0.1683662123955653,
      "grad_norm": 0.8964826464653015,
      "learning_rate": 0.00019014593481357192,
      "loss": 0.845,
      "step": 5300
    },
    {
      "epoch": 0.16900155659328442,
      "grad_norm": 1.1423965692520142,
      "learning_rate": 0.0001900566707475104,
      "loss": 0.8463,
      "step": 5320
    },
    {
      "epoch": 0.16963690079100352,
      "grad_norm": 0.895899772644043,
      "learning_rate": 0.00018996702534521888,
      "loss": 0.8631,
      "step": 5340
    },
    {
      "epoch": 0.17027224498872265,
      "grad_norm": 1.0254230499267578,
      "learning_rate": 0.00018987699898629208,
      "loss": 0.8489,
      "step": 5360
    },
    {
      "epoch": 0.17090758918644175,
      "grad_norm": 0.9370276927947998,
      "learning_rate": 0.00018978659205193794,
      "loss": 0.8822,
      "step": 5380
    },
    {
      "epoch": 0.17154293338416088,
      "grad_norm": 1.1030024290084839,
      "learning_rate": 0.00018969580492497577,
      "loss": 0.8834,
      "step": 5400
    },
    {
      "epoch": 0.17217827758187998,
      "grad_norm": 0.9148856997489929,
      "learning_rate": 0.00018960463798983494,
      "loss": 0.8198,
      "step": 5420
    },
    {
      "epoch": 0.1728136217795991,
      "grad_norm": 0.8851357102394104,
      "learning_rate": 0.00018951309163255288,
      "loss": 0.8077,
      "step": 5440
    },
    {
      "epoch": 0.1734489659773182,
      "grad_norm": 0.9701651334762573,
      "learning_rate": 0.00018942116624077386,
      "loss": 0.8687,
      "step": 5460
    },
    {
      "epoch": 0.17408431017503734,
      "grad_norm": 0.9508700966835022,
      "learning_rate": 0.00018932886220374696,
      "loss": 0.8764,
      "step": 5480
    },
    {
      "epoch": 0.17471965437275644,
      "grad_norm": 0.9914870858192444,
      "learning_rate": 0.00018923617991232466,
      "loss": 0.8157,
      "step": 5500
    },
    {
      "epoch": 0.17535499857047557,
      "grad_norm": 1.010511040687561,
      "learning_rate": 0.00018914311975896117,
      "loss": 0.839,
      "step": 5520
    },
    {
      "epoch": 0.17599034276819467,
      "grad_norm": 0.8063015937805176,
      "learning_rate": 0.00018904968213771065,
      "loss": 0.8308,
      "step": 5540
    },
    {
      "epoch": 0.1766256869659138,
      "grad_norm": 0.8653827905654907,
      "learning_rate": 0.00018895586744422564,
      "loss": 0.8304,
      "step": 5560
    },
    {
      "epoch": 0.1772610311636329,
      "grad_norm": 1.0596357583999634,
      "learning_rate": 0.00018886167607575532,
      "loss": 0.8346,
      "step": 5580
    },
    {
      "epoch": 0.177896375361352,
      "grad_norm": 1.0251786708831787,
      "learning_rate": 0.00018876710843114398,
      "loss": 0.8639,
      "step": 5600
    },
    {
      "epoch": 0.17853171955907113,
      "grad_norm": 0.8897235989570618,
      "learning_rate": 0.00018867216491082905,
      "loss": 0.8286,
      "step": 5620
    },
    {
      "epoch": 0.17916706375679023,
      "grad_norm": 0.8118072748184204,
      "learning_rate": 0.00018857684591683967,
      "loss": 0.8597,
      "step": 5640
    },
    {
      "epoch": 0.17980240795450936,
      "grad_norm": 0.8698698878288269,
      "learning_rate": 0.0001884811518527949,
      "loss": 0.7894,
      "step": 5660
    },
    {
      "epoch": 0.18043775215222846,
      "grad_norm": 0.8228470087051392,
      "learning_rate": 0.00018838508312390192,
      "loss": 0.8302,
      "step": 5680
    },
    {
      "epoch": 0.1810730963499476,
      "grad_norm": 1.1411319971084595,
      "learning_rate": 0.00018828864013695448,
      "loss": 0.8313,
      "step": 5700
    },
    {
      "epoch": 0.1817084405476667,
      "grad_norm": 0.8076447248458862,
      "learning_rate": 0.00018819182330033103,
      "loss": 0.798,
      "step": 5720
    },
    {
      "epoch": 0.18234378474538582,
      "grad_norm": 0.8669622540473938,
      "learning_rate": 0.00018809463302399304,
      "loss": 0.7911,
      "step": 5740
    },
    {
      "epoch": 0.18297912894310492,
      "grad_norm": 0.8435181975364685,
      "learning_rate": 0.0001879970697194833,
      "loss": 0.7951,
      "step": 5760
    },
    {
      "epoch": 0.18361447314082405,
      "grad_norm": 1.1023324728012085,
      "learning_rate": 0.00018789913379992418,
      "loss": 0.8253,
      "step": 5780
    },
    {
      "epoch": 0.18424981733854315,
      "grad_norm": 0.9319256544113159,
      "learning_rate": 0.00018780082568001585,
      "loss": 0.7625,
      "step": 5800
    },
    {
      "epoch": 0.18488516153626228,
      "grad_norm": 0.8259923458099365,
      "learning_rate": 0.00018770214577603443,
      "loss": 0.8079,
      "step": 5820
    },
    {
      "epoch": 0.18552050573398138,
      "grad_norm": 0.8953514695167542,
      "learning_rate": 0.00018760309450583043,
      "loss": 0.7647,
      "step": 5840
    },
    {
      "epoch": 0.1861558499317005,
      "grad_norm": 0.8347587585449219,
      "learning_rate": 0.00018750367228882685,
      "loss": 0.8089,
      "step": 5860
    },
    {
      "epoch": 0.1867911941294196,
      "grad_norm": 0.9788545966148376,
      "learning_rate": 0.00018740387954601742,
      "loss": 0.7737,
      "step": 5880
    },
    {
      "epoch": 0.18742653832713874,
      "grad_norm": 0.9509750008583069,
      "learning_rate": 0.00018730371669996478,
      "loss": 0.8073,
      "step": 5900
    },
    {
      "epoch": 0.18806188252485784,
      "grad_norm": 0.9388551115989685,
      "learning_rate": 0.0001872031841747988,
      "loss": 0.7585,
      "step": 5920
    },
    {
      "epoch": 0.18869722672257697,
      "grad_norm": 0.8342726826667786,
      "learning_rate": 0.00018710228239621476,
      "loss": 0.8025,
      "step": 5940
    },
    {
      "epoch": 0.18933257092029607,
      "grad_norm": 1.0455151796340942,
      "learning_rate": 0.00018700101179147134,
      "loss": 0.7603,
      "step": 5960
    },
    {
      "epoch": 0.1899679151180152,
      "grad_norm": 0.820931077003479,
      "learning_rate": 0.00018689937278938915,
      "loss": 0.7972,
      "step": 5980
    },
    {
      "epoch": 0.1906032593157343,
      "grad_norm": 0.8494334816932678,
      "learning_rate": 0.00018679736582034867,
      "loss": 0.7663,
      "step": 6000
    },
    {
      "epoch": 0.1906032593157343,
      "eval_loss": 0.7605160474777222,
      "eval_runtime": 45.0866,
      "eval_samples_per_second": 59.951,
      "eval_steps_per_second": 29.987,
      "step": 6000
    },
    {
      "epoch": 0.19123860351345343,
      "grad_norm": 0.9915199279785156,
      "learning_rate": 0.00018669499131628847,
      "loss": 0.7911,
      "step": 6020
    },
    {
      "epoch": 0.19187394771117253,
      "grad_norm": 1.009752869606018,
      "learning_rate": 0.00018659739550293418,
      "loss": 0.7791,
      "step": 6040
    },
    {
      "epoch": 0.19250929190889166,
      "grad_norm": 1.008296012878418,
      "learning_rate": 0.00018649430555384115,
      "loss": 0.7741,
      "step": 6060
    },
    {
      "epoch": 0.19314463610661076,
      "grad_norm": 0.9730678200721741,
      "learning_rate": 0.0001863908493530077,
      "loss": 0.8028,
      "step": 6080
    },
    {
      "epoch": 0.19377998030432986,
      "grad_norm": 0.8386117815971375,
      "learning_rate": 0.0001862870273385091,
      "loss": 0.789,
      "step": 6100
    },
    {
      "epoch": 0.194415324502049,
      "grad_norm": 0.8517867922782898,
      "learning_rate": 0.00018618283994996954,
      "loss": 0.7472,
      "step": 6120
    },
    {
      "epoch": 0.1950506686997681,
      "grad_norm": 0.8791770339012146,
      "learning_rate": 0.00018607828762856046,
      "loss": 0.7871,
      "step": 6140
    },
    {
      "epoch": 0.19568601289748722,
      "grad_norm": 0.9248822331428528,
      "learning_rate": 0.00018597337081699848,
      "loss": 0.762,
      "step": 6160
    },
    {
      "epoch": 0.19632135709520632,
      "grad_norm": 0.8059686422348022,
      "learning_rate": 0.00018586808995954367,
      "loss": 0.7345,
      "step": 6180
    },
    {
      "epoch": 0.19695670129292545,
      "grad_norm": 0.7610188126564026,
      "learning_rate": 0.00018576244550199758,
      "loss": 0.7478,
      "step": 6200
    },
    {
      "epoch": 0.19759204549064455,
      "grad_norm": 0.7763079404830933,
      "learning_rate": 0.00018565643789170144,
      "loss": 0.7552,
      "step": 6220
    },
    {
      "epoch": 0.19822738968836368,
      "grad_norm": 1.1734811067581177,
      "learning_rate": 0.00018555006757753418,
      "loss": 0.7645,
      "step": 6240
    },
    {
      "epoch": 0.19886273388608278,
      "grad_norm": 0.7641186714172363,
      "learning_rate": 0.00018544333500991053,
      "loss": 0.7267,
      "step": 6260
    },
    {
      "epoch": 0.1994980780838019,
      "grad_norm": 0.8322380781173706,
      "learning_rate": 0.00018533624064077922,
      "loss": 0.7601,
      "step": 6280
    },
    {
      "epoch": 0.200133422281521,
      "grad_norm": 0.9059064388275146,
      "learning_rate": 0.00018522878492362096,
      "loss": 0.7716,
      "step": 6300
    },
    {
      "epoch": 0.20076876647924013,
      "grad_norm": 0.7728195786476135,
      "learning_rate": 0.00018512096831344653,
      "loss": 0.7435,
      "step": 6320
    },
    {
      "epoch": 0.20140411067695924,
      "grad_norm": 0.9880885481834412,
      "learning_rate": 0.00018501279126679495,
      "loss": 0.7378,
      "step": 6340
    },
    {
      "epoch": 0.20203945487467836,
      "grad_norm": 0.8192346096038818,
      "learning_rate": 0.00018490425424173138,
      "loss": 0.7376,
      "step": 6360
    },
    {
      "epoch": 0.20267479907239747,
      "grad_norm": 1.175627589225769,
      "learning_rate": 0.0001847953576978453,
      "loss": 0.7672,
      "step": 6380
    },
    {
      "epoch": 0.2033101432701166,
      "grad_norm": 0.7959802746772766,
      "learning_rate": 0.0001846861020962486,
      "loss": 0.7331,
      "step": 6400
    },
    {
      "epoch": 0.2039454874678357,
      "grad_norm": 0.8343777060508728,
      "learning_rate": 0.0001845764878995735,
      "loss": 0.7142,
      "step": 6420
    },
    {
      "epoch": 0.20458083166555482,
      "grad_norm": 0.9900172352790833,
      "learning_rate": 0.00018446651557197066,
      "loss": 0.7819,
      "step": 6440
    },
    {
      "epoch": 0.20521617586327393,
      "grad_norm": 1.111018180847168,
      "learning_rate": 0.00018435618557910725,
      "loss": 0.7226,
      "step": 6460
    },
    {
      "epoch": 0.20585152006099305,
      "grad_norm": 0.9301121830940247,
      "learning_rate": 0.00018424549838816492,
      "loss": 0.7295,
      "step": 6480
    },
    {
      "epoch": 0.20648686425871215,
      "grad_norm": 0.894797146320343,
      "learning_rate": 0.0001841344544678378,
      "loss": 0.7199,
      "step": 6500
    },
    {
      "epoch": 0.20712220845643128,
      "grad_norm": 1.041779637336731,
      "learning_rate": 0.0001840230542883306,
      "loss": 0.7213,
      "step": 6520
    },
    {
      "epoch": 0.20775755265415038,
      "grad_norm": 0.9267428517341614,
      "learning_rate": 0.00018391129832135659,
      "loss": 0.7463,
      "step": 6540
    },
    {
      "epoch": 0.2083928968518695,
      "grad_norm": 0.8043299913406372,
      "learning_rate": 0.00018379918704013556,
      "loss": 0.6909,
      "step": 6560
    },
    {
      "epoch": 0.20902824104958861,
      "grad_norm": 0.8037667870521545,
      "learning_rate": 0.0001836867209193918,
      "loss": 0.7307,
      "step": 6580
    },
    {
      "epoch": 0.20966358524730772,
      "grad_norm": 0.9795257449150085,
      "learning_rate": 0.00018357390043535228,
      "loss": 0.7625,
      "step": 6600
    },
    {
      "epoch": 0.21029892944502684,
      "grad_norm": 1.0763206481933594,
      "learning_rate": 0.0001834607260657443,
      "loss": 0.7457,
      "step": 6620
    },
    {
      "epoch": 0.21093427364274595,
      "grad_norm": 0.8083770275115967,
      "learning_rate": 0.00018334719828979373,
      "loss": 0.7398,
      "step": 6640
    },
    {
      "epoch": 0.21156961784046507,
      "grad_norm": 0.8648799657821655,
      "learning_rate": 0.00018323331758822299,
      "loss": 0.7392,
      "step": 6660
    },
    {
      "epoch": 0.21220496203818418,
      "grad_norm": 1.322874903678894,
      "learning_rate": 0.0001831190844432488,
      "loss": 0.767,
      "step": 6680
    },
    {
      "epoch": 0.2128403062359033,
      "grad_norm": 0.8415853977203369,
      "learning_rate": 0.00018300449933858034,
      "loss": 0.7123,
      "step": 6700
    },
    {
      "epoch": 0.2134756504336224,
      "grad_norm": 0.8832991123199463,
      "learning_rate": 0.00018288956275941713,
      "loss": 0.7329,
      "step": 6720
    },
    {
      "epoch": 0.21411099463134153,
      "grad_norm": 0.8079715967178345,
      "learning_rate": 0.00018277427519244692,
      "loss": 0.6988,
      "step": 6740
    },
    {
      "epoch": 0.21474633882906063,
      "grad_norm": 0.9029518365859985,
      "learning_rate": 0.00018265863712584377,
      "loss": 0.6943,
      "step": 6760
    },
    {
      "epoch": 0.21538168302677976,
      "grad_norm": 0.9082062244415283,
      "learning_rate": 0.0001825426490492658,
      "loss": 0.7517,
      "step": 6780
    },
    {
      "epoch": 0.21601702722449886,
      "grad_norm": 0.9031996726989746,
      "learning_rate": 0.00018242631145385329,
      "loss": 0.7108,
      "step": 6800
    },
    {
      "epoch": 0.216652371422218,
      "grad_norm": 0.9114848375320435,
      "learning_rate": 0.00018230962483222648,
      "loss": 0.7151,
      "step": 6820
    },
    {
      "epoch": 0.2172877156199371,
      "grad_norm": 0.8056477308273315,
      "learning_rate": 0.00018219258967848355,
      "loss": 0.7154,
      "step": 6840
    },
    {
      "epoch": 0.21792305981765622,
      "grad_norm": 0.9029595255851746,
      "learning_rate": 0.0001820752064881985,
      "loss": 0.728,
      "step": 6860
    },
    {
      "epoch": 0.21855840401537532,
      "grad_norm": 0.9304366707801819,
      "learning_rate": 0.00018195747575841905,
      "loss": 0.7298,
      "step": 6880
    },
    {
      "epoch": 0.21919374821309445,
      "grad_norm": 1.2549713850021362,
      "learning_rate": 0.00018183939798766452,
      "loss": 0.7166,
      "step": 6900
    },
    {
      "epoch": 0.21982909241081355,
      "grad_norm": 0.8609549403190613,
      "learning_rate": 0.0001817209736759238,
      "loss": 0.7222,
      "step": 6920
    },
    {
      "epoch": 0.22046443660853268,
      "grad_norm": 0.9668901562690735,
      "learning_rate": 0.00018160220332465315,
      "loss": 0.706,
      "step": 6940
    },
    {
      "epoch": 0.22109978080625178,
      "grad_norm": 0.9426187872886658,
      "learning_rate": 0.00018148308743677407,
      "loss": 0.7549,
      "step": 6960
    },
    {
      "epoch": 0.2217351250039709,
      "grad_norm": 1.0274590253829956,
      "learning_rate": 0.00018136362651667123,
      "loss": 0.7118,
      "step": 6980
    },
    {
      "epoch": 0.22237046920169,
      "grad_norm": 1.0056123733520508,
      "learning_rate": 0.00018124382107019028,
      "loss": 0.7284,
      "step": 7000
    },
    {
      "epoch": 0.22237046920169,
      "eval_loss": 0.6820850968360901,
      "eval_runtime": 44.1137,
      "eval_samples_per_second": 61.274,
      "eval_steps_per_second": 30.648,
      "step": 7000
    },
    {
      "epoch": 0.22300581339940914,
      "grad_norm": 1.01372492313385,
      "learning_rate": 0.0001811236716046358,
      "loss": 0.7306,
      "step": 7020
    },
    {
      "epoch": 0.22364115759712824,
      "grad_norm": 0.8217781782150269,
      "learning_rate": 0.000181003178628769,
      "loss": 0.7216,
      "step": 7040
    },
    {
      "epoch": 0.22427650179484737,
      "grad_norm": 0.9484082460403442,
      "learning_rate": 0.00018088234265280573,
      "loss": 0.7164,
      "step": 7060
    },
    {
      "epoch": 0.22491184599256647,
      "grad_norm": 1.2144994735717773,
      "learning_rate": 0.0001807672312378185,
      "loss": 0.7248,
      "step": 7080
    },
    {
      "epoch": 0.22554719019028557,
      "grad_norm": 0.9574259519577026,
      "learning_rate": 0.00018064572788467363,
      "loss": 0.689,
      "step": 7100
    },
    {
      "epoch": 0.2261825343880047,
      "grad_norm": 0.7626876831054688,
      "learning_rate": 0.00018052998338935085,
      "loss": 0.748,
      "step": 7120
    },
    {
      "epoch": 0.2268178785857238,
      "grad_norm": 0.8534376621246338,
      "learning_rate": 0.00018040781461538648,
      "loss": 0.6947,
      "step": 7140
    },
    {
      "epoch": 0.22745322278344293,
      "grad_norm": 1.0029544830322266,
      "learning_rate": 0.00018028530536233676,
      "loss": 0.7319,
      "step": 7160
    },
    {
      "epoch": 0.22808856698116203,
      "grad_norm": 0.925713300704956,
      "learning_rate": 0.00018016245614895518,
      "loss": 0.7092,
      "step": 7180
    },
    {
      "epoch": 0.22872391117888116,
      "grad_norm": 0.8006899952888489,
      "learning_rate": 0.00018003926749543488,
      "loss": 0.6879,
      "step": 7200
    },
    {
      "epoch": 0.22935925537660026,
      "grad_norm": 0.8886255025863647,
      "learning_rate": 0.00017991573992340616,
      "loss": 0.6784,
      "step": 7220
    },
    {
      "epoch": 0.2299945995743194,
      "grad_norm": 0.8108293414115906,
      "learning_rate": 0.00017979187395593459,
      "loss": 0.7094,
      "step": 7240
    },
    {
      "epoch": 0.2306299437720385,
      "grad_norm": 1.0475900173187256,
      "learning_rate": 0.00017966767011751858,
      "loss": 0.696,
      "step": 7260
    },
    {
      "epoch": 0.23126528796975762,
      "grad_norm": 0.9214044809341431,
      "learning_rate": 0.0001795431289340872,
      "loss": 0.7125,
      "step": 7280
    },
    {
      "epoch": 0.23190063216747672,
      "grad_norm": 0.996101975440979,
      "learning_rate": 0.00017941825093299802,
      "loss": 0.6635,
      "step": 7300
    },
    {
      "epoch": 0.23253597636519585,
      "grad_norm": 0.9577082991600037,
      "learning_rate": 0.00017929303664303482,
      "loss": 0.6753,
      "step": 7320
    },
    {
      "epoch": 0.23317132056291495,
      "grad_norm": 1.0278524160385132,
      "learning_rate": 0.00017916748659440533,
      "loss": 0.7024,
      "step": 7340
    },
    {
      "epoch": 0.23380666476063408,
      "grad_norm": 0.758007287979126,
      "learning_rate": 0.00017904160131873906,
      "loss": 0.6877,
      "step": 7360
    },
    {
      "epoch": 0.23444200895835318,
      "grad_norm": 0.8926889300346375,
      "learning_rate": 0.00017891538134908502,
      "loss": 0.7123,
      "step": 7380
    },
    {
      "epoch": 0.2350773531560723,
      "grad_norm": 0.8747749924659729,
      "learning_rate": 0.00017878882721990936,
      "loss": 0.656,
      "step": 7400
    },
    {
      "epoch": 0.2357126973537914,
      "grad_norm": 1.012324333190918,
      "learning_rate": 0.00017866193946709327,
      "loss": 0.6885,
      "step": 7420
    },
    {
      "epoch": 0.23634804155151054,
      "grad_norm": 0.7973082065582275,
      "learning_rate": 0.00017853471862793068,
      "loss": 0.6627,
      "step": 7440
    },
    {
      "epoch": 0.23698338574922964,
      "grad_norm": 0.8259735107421875,
      "learning_rate": 0.00017840716524112582,
      "loss": 0.6861,
      "step": 7460
    },
    {
      "epoch": 0.23761872994694877,
      "grad_norm": 0.7817295789718628,
      "learning_rate": 0.00017827927984679113,
      "loss": 0.6808,
      "step": 7480
    },
    {
      "epoch": 0.23825407414466787,
      "grad_norm": 0.8139945864677429,
      "learning_rate": 0.00017815106298644495,
      "loss": 0.6891,
      "step": 7500
    },
    {
      "epoch": 0.238889418342387,
      "grad_norm": 1.0507733821868896,
      "learning_rate": 0.00017802251520300906,
      "loss": 0.6936,
      "step": 7520
    },
    {
      "epoch": 0.2395247625401061,
      "grad_norm": 0.929937481880188,
      "learning_rate": 0.0001778936370408066,
      "loss": 0.687,
      "step": 7540
    },
    {
      "epoch": 0.24016010673782523,
      "grad_norm": 1.0632777214050293,
      "learning_rate": 0.00017776442904555962,
      "loss": 0.6656,
      "step": 7560
    },
    {
      "epoch": 0.24079545093554433,
      "grad_norm": 1.1247339248657227,
      "learning_rate": 0.00017763489176438686,
      "loss": 0.6645,
      "step": 7580
    },
    {
      "epoch": 0.24143079513326343,
      "grad_norm": 0.8897901773452759,
      "learning_rate": 0.00017750502574580135,
      "loss": 0.6832,
      "step": 7600
    },
    {
      "epoch": 0.24206613933098256,
      "grad_norm": 0.9285283088684082,
      "learning_rate": 0.00017737483153970816,
      "loss": 0.6841,
      "step": 7620
    },
    {
      "epoch": 0.24270148352870166,
      "grad_norm": 0.8733476400375366,
      "learning_rate": 0.00017724430969740196,
      "loss": 0.6567,
      "step": 7640
    },
    {
      "epoch": 0.2433368277264208,
      "grad_norm": 0.9532790184020996,
      "learning_rate": 0.0001771134607715649,
      "loss": 0.6795,
      "step": 7660
    },
    {
      "epoch": 0.2439721719241399,
      "grad_norm": 1.0881035327911377,
      "learning_rate": 0.00017698228531626398,
      "loss": 0.693,
      "step": 7680
    },
    {
      "epoch": 0.24460751612185902,
      "grad_norm": 1.0936851501464844,
      "learning_rate": 0.00017685078388694897,
      "loss": 0.6852,
      "step": 7700
    },
    {
      "epoch": 0.24524286031957812,
      "grad_norm": 1.0439817905426025,
      "learning_rate": 0.0001767189570404499,
      "loss": 0.6746,
      "step": 7720
    },
    {
      "epoch": 0.24587820451729725,
      "grad_norm": 0.8599082231521606,
      "learning_rate": 0.00017658680533497477,
      "loss": 0.6719,
      "step": 7740
    },
    {
      "epoch": 0.24651354871501635,
      "grad_norm": 0.9633190035820007,
      "learning_rate": 0.00017645432933010712,
      "loss": 0.7091,
      "step": 7760
    },
    {
      "epoch": 0.24714889291273548,
      "grad_norm": 0.8989465236663818,
      "learning_rate": 0.00017632152958680378,
      "loss": 0.6649,
      "step": 7780
    },
    {
      "epoch": 0.24778423711045458,
      "grad_norm": 0.8468721508979797,
      "learning_rate": 0.00017618840666739228,
      "loss": 0.6789,
      "step": 7800
    },
    {
      "epoch": 0.2484195813081737,
      "grad_norm": 0.8482181429862976,
      "learning_rate": 0.00017605496113556882,
      "loss": 0.6902,
      "step": 7820
    },
    {
      "epoch": 0.2490549255058928,
      "grad_norm": 0.8012595176696777,
      "learning_rate": 0.00017592119355639544,
      "loss": 0.6733,
      "step": 7840
    },
    {
      "epoch": 0.24969026970361194,
      "grad_norm": 0.8117650151252747,
      "learning_rate": 0.00017578710449629804,
      "loss": 0.6916,
      "step": 7860
    },
    {
      "epoch": 0.25032561390133107,
      "grad_norm": 0.9711939096450806,
      "learning_rate": 0.00017565269452306364,
      "loss": 0.6701,
      "step": 7880
    },
    {
      "epoch": 0.25096095809905017,
      "grad_norm": 0.8234876394271851,
      "learning_rate": 0.00017551796420583833,
      "loss": 0.62,
      "step": 7900
    },
    {
      "epoch": 0.25159630229676927,
      "grad_norm": 0.8263707756996155,
      "learning_rate": 0.00017538967420545803,
      "loss": 0.6907,
      "step": 7920
    },
    {
      "epoch": 0.25223164649448837,
      "grad_norm": 1.2548505067825317,
      "learning_rate": 0.00017525432085959138,
      "loss": 0.6644,
      "step": 7940
    },
    {
      "epoch": 0.2528669906922075,
      "grad_norm": 1.1948567628860474,
      "learning_rate": 0.00017511864885660835,
      "loss": 0.6609,
      "step": 7960
    },
    {
      "epoch": 0.25350233488992663,
      "grad_norm": 0.9310169219970703,
      "learning_rate": 0.0001749826587709989,
      "loss": 0.6757,
      "step": 7980
    },
    {
      "epoch": 0.25413767908764573,
      "grad_norm": 0.8832531571388245,
      "learning_rate": 0.00017484635117859983,
      "loss": 0.6552,
      "step": 8000
    },
    {
      "epoch": 0.25413767908764573,
      "eval_loss": 0.6333429217338562,
      "eval_runtime": 44.406,
      "eval_samples_per_second": 60.87,
      "eval_steps_per_second": 30.446,
      "step": 8000
    },
    {
      "epoch": 0.25477302328536483,
      "grad_norm": 0.7624004483222961,
      "learning_rate": 0.00017470972665659245,
      "loss": 0.6567,
      "step": 8020
    },
    {
      "epoch": 0.255408367483084,
      "grad_norm": 0.9134401082992554,
      "learning_rate": 0.00017457278578350002,
      "loss": 0.6681,
      "step": 8040
    },
    {
      "epoch": 0.2560437116808031,
      "grad_norm": 0.9597674608230591,
      "learning_rate": 0.00017443552913918534,
      "loss": 0.6818,
      "step": 8060
    },
    {
      "epoch": 0.2566790558785222,
      "grad_norm": 0.961934506893158,
      "learning_rate": 0.00017429795730484836,
      "loss": 0.6833,
      "step": 8080
    },
    {
      "epoch": 0.2573144000762413,
      "grad_norm": 0.9118033647537231,
      "learning_rate": 0.00017416007086302367,
      "loss": 0.6607,
      "step": 8100
    },
    {
      "epoch": 0.2579497442739604,
      "grad_norm": 0.8447214961051941,
      "learning_rate": 0.00017402187039757805,
      "loss": 0.6409,
      "step": 8120
    },
    {
      "epoch": 0.25858508847167955,
      "grad_norm": 1.010040044784546,
      "learning_rate": 0.0001738833564937079,
      "loss": 0.6761,
      "step": 8140
    },
    {
      "epoch": 0.25922043266939865,
      "grad_norm": 0.8686466217041016,
      "learning_rate": 0.00017374452973793693,
      "loss": 0.6575,
      "step": 8160
    },
    {
      "epoch": 0.25985577686711775,
      "grad_norm": 1.0445839166641235,
      "learning_rate": 0.00017360539071811356,
      "loss": 0.667,
      "step": 8180
    },
    {
      "epoch": 0.26049112106483685,
      "grad_norm": 1.1015607118606567,
      "learning_rate": 0.00017346594002340843,
      "loss": 0.6468,
      "step": 8200
    },
    {
      "epoch": 0.261126465262556,
      "grad_norm": 1.4550483226776123,
      "learning_rate": 0.00017332617824431204,
      "loss": 0.6642,
      "step": 8220
    },
    {
      "epoch": 0.2617618094602751,
      "grad_norm": 0.8968580961227417,
      "learning_rate": 0.000173186105972632,
      "loss": 0.6695,
      "step": 8240
    },
    {
      "epoch": 0.2623971536579942,
      "grad_norm": 0.9802786111831665,
      "learning_rate": 0.00017304572380149078,
      "loss": 0.6516,
      "step": 8260
    },
    {
      "epoch": 0.2630324978557133,
      "grad_norm": 0.8785617351531982,
      "learning_rate": 0.00017290503232532305,
      "loss": 0.6857,
      "step": 8280
    },
    {
      "epoch": 0.26366784205343247,
      "grad_norm": 0.8675135970115662,
      "learning_rate": 0.00017276403213987323,
      "loss": 0.6493,
      "step": 8300
    },
    {
      "epoch": 0.26430318625115157,
      "grad_norm": 0.8159687519073486,
      "learning_rate": 0.0001726227238421929,
      "loss": 0.6445,
      "step": 8320
    },
    {
      "epoch": 0.26493853044887067,
      "grad_norm": 0.8598359823226929,
      "learning_rate": 0.00017248110803063833,
      "loss": 0.6515,
      "step": 8340
    },
    {
      "epoch": 0.26557387464658977,
      "grad_norm": 1.0304324626922607,
      "learning_rate": 0.00017233918530486792,
      "loss": 0.6431,
      "step": 8360
    },
    {
      "epoch": 0.2662092188443089,
      "grad_norm": 0.933110773563385,
      "learning_rate": 0.0001722040749834389,
      "loss": 0.6958,
      "step": 8380
    },
    {
      "epoch": 0.266844563042028,
      "grad_norm": 0.9690568447113037,
      "learning_rate": 0.0001720615555046345,
      "loss": 0.5922,
      "step": 8400
    },
    {
      "epoch": 0.26747990723974713,
      "grad_norm": 0.9293822646141052,
      "learning_rate": 0.0001719187308881687,
      "loss": 0.6407,
      "step": 8420
    },
    {
      "epoch": 0.26811525143746623,
      "grad_norm": 0.8957870602607727,
      "learning_rate": 0.00017177560173881846,
      "loss": 0.662,
      "step": 8440
    },
    {
      "epoch": 0.2687505956351854,
      "grad_norm": 1.0288225412368774,
      "learning_rate": 0.0001716321686626503,
      "loss": 0.6395,
      "step": 8460
    },
    {
      "epoch": 0.2693859398329045,
      "grad_norm": 0.838657021522522,
      "learning_rate": 0.00017148843226701764,
      "loss": 0.6313,
      "step": 8480
    },
    {
      "epoch": 0.2700212840306236,
      "grad_norm": 0.8575971722602844,
      "learning_rate": 0.00017134439316055834,
      "loss": 0.6655,
      "step": 8500
    },
    {
      "epoch": 0.2706566282283427,
      "grad_norm": 0.9840354919433594,
      "learning_rate": 0.00017120005195319195,
      "loss": 0.6646,
      "step": 8520
    },
    {
      "epoch": 0.27129197242606184,
      "grad_norm": 0.8279704451560974,
      "learning_rate": 0.00017105540925611737,
      "loss": 0.6259,
      "step": 8540
    },
    {
      "epoch": 0.27192731662378095,
      "grad_norm": 1.0609900951385498,
      "learning_rate": 0.00017091046568180996,
      "loss": 0.6561,
      "step": 8560
    },
    {
      "epoch": 0.27256266082150005,
      "grad_norm": 0.890514612197876,
      "learning_rate": 0.0001707652218440193,
      "loss": 0.6324,
      "step": 8580
    },
    {
      "epoch": 0.27319800501921915,
      "grad_norm": 0.9357948303222656,
      "learning_rate": 0.0001706196783577663,
      "loss": 0.6116,
      "step": 8600
    },
    {
      "epoch": 0.27383334921693825,
      "grad_norm": 0.9577456116676331,
      "learning_rate": 0.0001704738358393407,
      "loss": 0.6764,
      "step": 8620
    },
    {
      "epoch": 0.2744686934146574,
      "grad_norm": 0.834900438785553,
      "learning_rate": 0.0001703276949062985,
      "loss": 0.6324,
      "step": 8640
    },
    {
      "epoch": 0.2751040376123765,
      "grad_norm": 0.8283354043960571,
      "learning_rate": 0.00017018125617745933,
      "loss": 0.6187,
      "step": 8660
    },
    {
      "epoch": 0.2757393818100956,
      "grad_norm": 0.854200541973114,
      "learning_rate": 0.00017003452027290373,
      "loss": 0.6294,
      "step": 8680
    },
    {
      "epoch": 0.2763747260078147,
      "grad_norm": 0.8695046901702881,
      "learning_rate": 0.00016988748781397064,
      "loss": 0.6377,
      "step": 8700
    },
    {
      "epoch": 0.27701007020553386,
      "grad_norm": 0.7802212238311768,
      "learning_rate": 0.00016974015942325475,
      "loss": 0.6051,
      "step": 8720
    },
    {
      "epoch": 0.27764541440325297,
      "grad_norm": 1.0842890739440918,
      "learning_rate": 0.00016959253572460382,
      "loss": 0.6352,
      "step": 8740
    },
    {
      "epoch": 0.27828075860097207,
      "grad_norm": 0.8472367525100708,
      "learning_rate": 0.0001694446173431161,
      "loss": 0.5907,
      "step": 8760
    },
    {
      "epoch": 0.27891610279869117,
      "grad_norm": 0.8548029661178589,
      "learning_rate": 0.0001692964049051376,
      "loss": 0.6434,
      "step": 8780
    },
    {
      "epoch": 0.2795514469964103,
      "grad_norm": 0.9771581888198853,
      "learning_rate": 0.00016914789903825945,
      "loss": 0.6381,
      "step": 8800
    },
    {
      "epoch": 0.2801867911941294,
      "grad_norm": 0.9199798703193665,
      "learning_rate": 0.0001689991003713154,
      "loss": 0.6589,
      "step": 8820
    },
    {
      "epoch": 0.2808221353918485,
      "grad_norm": 1.0753369331359863,
      "learning_rate": 0.00016885000953437894,
      "loss": 0.6413,
      "step": 8840
    },
    {
      "epoch": 0.2814574795895676,
      "grad_norm": 1.0925753116607666,
      "learning_rate": 0.00016870062715876075,
      "loss": 0.6234,
      "step": 8860
    },
    {
      "epoch": 0.2820928237872868,
      "grad_norm": 1.0023586750030518,
      "learning_rate": 0.00016855095387700598,
      "loss": 0.6104,
      "step": 8880
    },
    {
      "epoch": 0.2827281679850059,
      "grad_norm": 0.9077417254447937,
      "learning_rate": 0.00016840099032289162,
      "loss": 0.602,
      "step": 8900
    },
    {
      "epoch": 0.283363512182725,
      "grad_norm": 0.8238940238952637,
      "learning_rate": 0.00016825073713142374,
      "loss": 0.6157,
      "step": 8920
    },
    {
      "epoch": 0.2839988563804441,
      "grad_norm": 1.111948847770691,
      "learning_rate": 0.000168100194938835,
      "loss": 0.6092,
      "step": 8940
    },
    {
      "epoch": 0.28463420057816324,
      "grad_norm": 1.0630967617034912,
      "learning_rate": 0.0001679493643825816,
      "loss": 0.5904,
      "step": 8960
    },
    {
      "epoch": 0.28526954477588234,
      "grad_norm": 0.8827186822891235,
      "learning_rate": 0.00016779824610134092,
      "loss": 0.6166,
      "step": 8980
    },
    {
      "epoch": 0.28590488897360145,
      "grad_norm": 0.9229192137718201,
      "learning_rate": 0.00016764684073500866,
      "loss": 0.6178,
      "step": 9000
    },
    {
      "epoch": 0.28590488897360145,
      "eval_loss": 0.5966877341270447,
      "eval_runtime": 44.6044,
      "eval_samples_per_second": 60.599,
      "eval_steps_per_second": 30.311,
      "step": 9000
    },
    {
      "epoch": 0.28654023317132055,
      "grad_norm": 0.8136707544326782,
      "learning_rate": 0.00016749514892469615,
      "loss": 0.6366,
      "step": 9020
    },
    {
      "epoch": 0.2871755773690397,
      "grad_norm": 0.8175415992736816,
      "learning_rate": 0.00016734317131272762,
      "loss": 0.6177,
      "step": 9040
    },
    {
      "epoch": 0.2878109215667588,
      "grad_norm": 0.929182767868042,
      "learning_rate": 0.00016719090854263753,
      "loss": 0.646,
      "step": 9060
    },
    {
      "epoch": 0.2884462657644779,
      "grad_norm": 0.9779849052429199,
      "learning_rate": 0.0001670383612591678,
      "loss": 0.6362,
      "step": 9080
    },
    {
      "epoch": 0.289081609962197,
      "grad_norm": 0.8542407751083374,
      "learning_rate": 0.00016688553010826506,
      "loss": 0.6076,
      "step": 9100
    },
    {
      "epoch": 0.2897169541599161,
      "grad_norm": 0.8885607719421387,
      "learning_rate": 0.00016673241573707804,
      "loss": 0.6055,
      "step": 9120
    },
    {
      "epoch": 0.29035229835763526,
      "grad_norm": 0.876097559928894,
      "learning_rate": 0.0001665790187939546,
      "loss": 0.6196,
      "step": 9140
    },
    {
      "epoch": 0.29098764255535436,
      "grad_norm": 1.0198227167129517,
      "learning_rate": 0.0001664253399284393,
      "loss": 0.6374,
      "step": 9160
    },
    {
      "epoch": 0.29162298675307347,
      "grad_norm": 0.8938513994216919,
      "learning_rate": 0.00016627137979127033,
      "loss": 0.6254,
      "step": 9180
    },
    {
      "epoch": 0.29225833095079257,
      "grad_norm": 0.7427443861961365,
      "learning_rate": 0.00016611713903437692,
      "loss": 0.6099,
      "step": 9200
    },
    {
      "epoch": 0.2928936751485117,
      "grad_norm": 0.9959378242492676,
      "learning_rate": 0.00016596261831087661,
      "loss": 0.648,
      "step": 9220
    },
    {
      "epoch": 0.2935290193462308,
      "grad_norm": 1.048519253730774,
      "learning_rate": 0.00016580781827507242,
      "loss": 0.6292,
      "step": 9240
    },
    {
      "epoch": 0.2941643635439499,
      "grad_norm": 0.858858585357666,
      "learning_rate": 0.00016565273958245002,
      "loss": 0.6252,
      "step": 9260
    },
    {
      "epoch": 0.294799707741669,
      "grad_norm": 0.8437022566795349,
      "learning_rate": 0.00016549738288967514,
      "loss": 0.6188,
      "step": 9280
    },
    {
      "epoch": 0.2954350519393882,
      "grad_norm": 0.8608834743499756,
      "learning_rate": 0.00016534174885459056,
      "loss": 0.6509,
      "step": 9300
    },
    {
      "epoch": 0.2960703961371073,
      "grad_norm": 1.083897590637207,
      "learning_rate": 0.00016518583813621357,
      "loss": 0.6193,
      "step": 9320
    },
    {
      "epoch": 0.2967057403348264,
      "grad_norm": 0.9606235027313232,
      "learning_rate": 0.0001650296513947329,
      "loss": 0.6287,
      "step": 9340
    },
    {
      "epoch": 0.2973410845325455,
      "grad_norm": 1.0519804954528809,
      "learning_rate": 0.00016487318929150617,
      "loss": 0.6097,
      "step": 9360
    },
    {
      "epoch": 0.29797642873026464,
      "grad_norm": 1.3490453958511353,
      "learning_rate": 0.000164716452489057,
      "loss": 0.6043,
      "step": 9380
    },
    {
      "epoch": 0.29861177292798374,
      "grad_norm": 1.1292142868041992,
      "learning_rate": 0.00016455944165107207,
      "loss": 0.5896,
      "step": 9400
    },
    {
      "epoch": 0.29924711712570284,
      "grad_norm": 0.9570278525352478,
      "learning_rate": 0.00016440215744239865,
      "loss": 0.6087,
      "step": 9420
    },
    {
      "epoch": 0.29988246132342194,
      "grad_norm": 0.8570756316184998,
      "learning_rate": 0.00016424460052904137,
      "loss": 0.6036,
      "step": 9440
    },
    {
      "epoch": 0.3005178055211411,
      "grad_norm": 0.9214951395988464,
      "learning_rate": 0.00016408677157815974,
      "loss": 0.6519,
      "step": 9460
    },
    {
      "epoch": 0.3011531497188602,
      "grad_norm": 1.1580623388290405,
      "learning_rate": 0.00016392867125806504,
      "loss": 0.5991,
      "step": 9480
    },
    {
      "epoch": 0.3017884939165793,
      "grad_norm": 1.1025846004486084,
      "learning_rate": 0.00016377030023821782,
      "loss": 0.6416,
      "step": 9500
    },
    {
      "epoch": 0.3024238381142984,
      "grad_norm": 0.8918984532356262,
      "learning_rate": 0.00016361165918922477,
      "loss": 0.6165,
      "step": 9520
    },
    {
      "epoch": 0.30305918231201756,
      "grad_norm": 0.8747968673706055,
      "learning_rate": 0.000163452748782836,
      "loss": 0.6094,
      "step": 9540
    },
    {
      "epoch": 0.30369452650973666,
      "grad_norm": 0.7480270862579346,
      "learning_rate": 0.0001632935696919422,
      "loss": 0.5987,
      "step": 9560
    },
    {
      "epoch": 0.30432987070745576,
      "grad_norm": 0.8854328393936157,
      "learning_rate": 0.00016313412259057178,
      "loss": 0.6514,
      "step": 9580
    },
    {
      "epoch": 0.30496521490517486,
      "grad_norm": 1.0659030675888062,
      "learning_rate": 0.00016297440815388802,
      "loss": 0.5796,
      "step": 9600
    },
    {
      "epoch": 0.305600559102894,
      "grad_norm": 0.9668769240379333,
      "learning_rate": 0.00016281442705818618,
      "loss": 0.6147,
      "step": 9620
    },
    {
      "epoch": 0.3062359033006131,
      "grad_norm": 0.939028263092041,
      "learning_rate": 0.00016265417998089068,
      "loss": 0.6241,
      "step": 9640
    },
    {
      "epoch": 0.3068712474983322,
      "grad_norm": 0.8955005407333374,
      "learning_rate": 0.00016249366760055222,
      "loss": 0.5832,
      "step": 9660
    },
    {
      "epoch": 0.3075065916960513,
      "grad_norm": 0.7991370558738708,
      "learning_rate": 0.00016233289059684492,
      "loss": 0.5799,
      "step": 9680
    },
    {
      "epoch": 0.3081419358937704,
      "grad_norm": 0.8115846514701843,
      "learning_rate": 0.00016217184965056336,
      "loss": 0.6109,
      "step": 9700
    },
    {
      "epoch": 0.3087772800914896,
      "grad_norm": 0.7488042712211609,
      "learning_rate": 0.00016201054544361977,
      "loss": 0.6166,
      "step": 9720
    },
    {
      "epoch": 0.3094126242892087,
      "grad_norm": 0.8463062644004822,
      "learning_rate": 0.00016184897865904123,
      "loss": 0.5779,
      "step": 9740
    },
    {
      "epoch": 0.3100479684869278,
      "grad_norm": 1.083001732826233,
      "learning_rate": 0.00016168714998096654,
      "loss": 0.6175,
      "step": 9760
    },
    {
      "epoch": 0.3106833126846469,
      "grad_norm": 0.8545092940330505,
      "learning_rate": 0.00016152506009464357,
      "loss": 0.6104,
      "step": 9780
    },
    {
      "epoch": 0.31131865688236604,
      "grad_norm": 0.9297589063644409,
      "learning_rate": 0.00016136270968642618,
      "loss": 0.5831,
      "step": 9800
    },
    {
      "epoch": 0.31195400108008514,
      "grad_norm": 0.7775977253913879,
      "learning_rate": 0.0001612000994437714,
      "loss": 0.6001,
      "step": 9820
    },
    {
      "epoch": 0.31258934527780424,
      "grad_norm": 0.943267822265625,
      "learning_rate": 0.0001610372300552366,
      "loss": 0.6089,
      "step": 9840
    },
    {
      "epoch": 0.31322468947552334,
      "grad_norm": 0.8398995399475098,
      "learning_rate": 0.0001608741022104763,
      "loss": 0.5929,
      "step": 9860
    },
    {
      "epoch": 0.3138600336732425,
      "grad_norm": 1.0078269243240356,
      "learning_rate": 0.00016071071660023954,
      "loss": 0.6215,
      "step": 9880
    },
    {
      "epoch": 0.3144953778709616,
      "grad_norm": 0.9710105657577515,
      "learning_rate": 0.0001605470739163669,
      "loss": 0.5983,
      "step": 9900
    },
    {
      "epoch": 0.3151307220686807,
      "grad_norm": 0.8864800333976746,
      "learning_rate": 0.00016038317485178734,
      "loss": 0.5812,
      "step": 9920
    },
    {
      "epoch": 0.3157660662663998,
      "grad_norm": 0.9775105118751526,
      "learning_rate": 0.0001602190201005156,
      "loss": 0.5899,
      "step": 9940
    },
    {
      "epoch": 0.31640141046411896,
      "grad_norm": 0.8554601669311523,
      "learning_rate": 0.00016005461035764902,
      "loss": 0.5989,
      "step": 9960
    },
    {
      "epoch": 0.31703675466183806,
      "grad_norm": 0.8149896264076233,
      "learning_rate": 0.0001598899463193647,
      "loss": 0.6383,
      "step": 9980
    },
    {
      "epoch": 0.31767209885955716,
      "grad_norm": 1.1985602378845215,
      "learning_rate": 0.00015972502868291652,
      "loss": 0.604,
      "step": 10000
    },
    {
      "epoch": 0.31767209885955716,
      "eval_loss": 0.5633410811424255,
      "eval_runtime": 44.2566,
      "eval_samples_per_second": 61.076,
      "eval_steps_per_second": 30.549,
      "step": 10000
    },
    {
      "epoch": 0.31830744305727626,
      "grad_norm": 0.9848890900611877,
      "learning_rate": 0.0001595598581466322,
      "loss": 0.5741,
      "step": 10020
    },
    {
      "epoch": 0.3189427872549954,
      "grad_norm": 1.0653225183486938,
      "learning_rate": 0.00015939443540991034,
      "loss": 0.6154,
      "step": 10040
    },
    {
      "epoch": 0.3195781314527145,
      "grad_norm": 0.8440039157867432,
      "learning_rate": 0.0001592287611732175,
      "loss": 0.6077,
      "step": 10060
    },
    {
      "epoch": 0.3202134756504336,
      "grad_norm": 0.8706631660461426,
      "learning_rate": 0.00015906283613808508,
      "loss": 0.6143,
      "step": 10080
    },
    {
      "epoch": 0.3208488198481527,
      "grad_norm": 1.0338808298110962,
      "learning_rate": 0.00015889666100710659,
      "loss": 0.5697,
      "step": 10100
    },
    {
      "epoch": 0.3214841640458719,
      "grad_norm": 0.8499680757522583,
      "learning_rate": 0.00015873023648393448,
      "loss": 0.5968,
      "step": 10120
    },
    {
      "epoch": 0.322119508243591,
      "grad_norm": 1.0106873512268066,
      "learning_rate": 0.00015856356327327724,
      "loss": 0.5657,
      "step": 10140
    },
    {
      "epoch": 0.3227548524413101,
      "grad_norm": 0.9771645665168762,
      "learning_rate": 0.00015839664208089634,
      "loss": 0.5989,
      "step": 10160
    },
    {
      "epoch": 0.3233901966390292,
      "grad_norm": 0.9425153136253357,
      "learning_rate": 0.0001582294736136035,
      "loss": 0.6314,
      "step": 10180
    },
    {
      "epoch": 0.3240255408367483,
      "grad_norm": 1.1419885158538818,
      "learning_rate": 0.0001580620585792572,
      "loss": 0.6137,
      "step": 10200
    },
    {
      "epoch": 0.32466088503446744,
      "grad_norm": 0.8356417417526245,
      "learning_rate": 0.00015789439768676032,
      "loss": 0.6189,
      "step": 10220
    },
    {
      "epoch": 0.32529622923218654,
      "grad_norm": 0.9876666069030762,
      "learning_rate": 0.00015772649164605648,
      "loss": 0.6069,
      "step": 10240
    },
    {
      "epoch": 0.32593157342990564,
      "grad_norm": 1.0510075092315674,
      "learning_rate": 0.0001575583411681276,
      "loss": 0.5996,
      "step": 10260
    },
    {
      "epoch": 0.32656691762762474,
      "grad_norm": 0.91109299659729,
      "learning_rate": 0.00015738994696499055,
      "loss": 0.5996,
      "step": 10280
    },
    {
      "epoch": 0.3272022618253439,
      "grad_norm": 0.8995181322097778,
      "learning_rate": 0.00015722130974969421,
      "loss": 0.5798,
      "step": 10300
    },
    {
      "epoch": 0.327837606023063,
      "grad_norm": 1.1067475080490112,
      "learning_rate": 0.00015705243023631652,
      "loss": 0.5983,
      "step": 10320
    },
    {
      "epoch": 0.3284729502207821,
      "grad_norm": 1.0324633121490479,
      "learning_rate": 0.00015688330913996135,
      "loss": 0.6011,
      "step": 10340
    },
    {
      "epoch": 0.3291082944185012,
      "grad_norm": 1.0662481784820557,
      "learning_rate": 0.0001567139471767556,
      "loss": 0.6254,
      "step": 10360
    },
    {
      "epoch": 0.32974363861622036,
      "grad_norm": 0.9539555907249451,
      "learning_rate": 0.00015654434506384607,
      "loss": 0.6176,
      "step": 10380
    },
    {
      "epoch": 0.33037898281393946,
      "grad_norm": 0.7341588139533997,
      "learning_rate": 0.00015637450351939637,
      "loss": 0.5852,
      "step": 10400
    },
    {
      "epoch": 0.33101432701165856,
      "grad_norm": 0.9077139496803284,
      "learning_rate": 0.00015620442326258414,
      "loss": 0.609,
      "step": 10420
    },
    {
      "epoch": 0.33164967120937766,
      "grad_norm": 1.083999752998352,
      "learning_rate": 0.00015603410501359766,
      "loss": 0.5768,
      "step": 10440
    },
    {
      "epoch": 0.3322850154070968,
      "grad_norm": 0.9190422296524048,
      "learning_rate": 0.000155863549493633,
      "loss": 0.5845,
      "step": 10460
    },
    {
      "epoch": 0.3329203596048159,
      "grad_norm": 1.0731889009475708,
      "learning_rate": 0.000155692757424891,
      "loss": 0.5988,
      "step": 10480
    },
    {
      "epoch": 0.333555703802535,
      "grad_norm": 0.9898316264152527,
      "learning_rate": 0.00015552172953057407,
      "loss": 0.5918,
      "step": 10500
    },
    {
      "epoch": 0.3341910480002541,
      "grad_norm": 1.135695219039917,
      "learning_rate": 0.00015535046653488322,
      "loss": 0.5882,
      "step": 10520
    },
    {
      "epoch": 0.3348263921979733,
      "grad_norm": 1.0453022718429565,
      "learning_rate": 0.000155178969163015,
      "loss": 0.609,
      "step": 10540
    },
    {
      "epoch": 0.3354617363956924,
      "grad_norm": 0.9859703183174133,
      "learning_rate": 0.00015500723814115835,
      "loss": 0.5899,
      "step": 10560
    },
    {
      "epoch": 0.3360970805934115,
      "grad_norm": 1.031168818473816,
      "learning_rate": 0.00015483527419649163,
      "loss": 0.5987,
      "step": 10580
    },
    {
      "epoch": 0.3367324247911306,
      "grad_norm": 1.1591908931732178,
      "learning_rate": 0.00015466307805717951,
      "loss": 0.6191,
      "step": 10600
    },
    {
      "epoch": 0.33736776898884974,
      "grad_norm": 0.8246921896934509,
      "learning_rate": 0.00015449065045236977,
      "loss": 0.6098,
      "step": 10620
    },
    {
      "epoch": 0.33800311318656884,
      "grad_norm": 0.8392571210861206,
      "learning_rate": 0.0001543179921121904,
      "loss": 0.5675,
      "step": 10640
    },
    {
      "epoch": 0.33863845738428794,
      "grad_norm": 0.8678343892097473,
      "learning_rate": 0.00015414510376774633,
      "loss": 0.5721,
      "step": 10660
    },
    {
      "epoch": 0.33927380158200704,
      "grad_norm": 0.8436061143875122,
      "learning_rate": 0.00015397198615111653,
      "loss": 0.5703,
      "step": 10680
    },
    {
      "epoch": 0.33990914577972614,
      "grad_norm": 0.9926438927650452,
      "learning_rate": 0.00015379863999535074,
      "loss": 0.6049,
      "step": 10700
    },
    {
      "epoch": 0.3405444899774453,
      "grad_norm": 1.098764419555664,
      "learning_rate": 0.00015362506603446637,
      "loss": 0.6007,
      "step": 10720
    },
    {
      "epoch": 0.3411798341751644,
      "grad_norm": 1.052038311958313,
      "learning_rate": 0.00015345126500344554,
      "loss": 0.5865,
      "step": 10740
    },
    {
      "epoch": 0.3418151783728835,
      "grad_norm": 0.8772541880607605,
      "learning_rate": 0.00015327723763823188,
      "loss": 0.6066,
      "step": 10760
    },
    {
      "epoch": 0.3424505225706026,
      "grad_norm": 0.7938296794891357,
      "learning_rate": 0.00015310298467572733,
      "loss": 0.5467,
      "step": 10780
    },
    {
      "epoch": 0.34308586676832176,
      "grad_norm": 1.0938440561294556,
      "learning_rate": 0.00015292850685378915,
      "loss": 0.5916,
      "step": 10800
    },
    {
      "epoch": 0.34372121096604086,
      "grad_norm": 0.8460657000541687,
      "learning_rate": 0.00015275380491122672,
      "loss": 0.603,
      "step": 10820
    },
    {
      "epoch": 0.34435655516375996,
      "grad_norm": 0.8238389492034912,
      "learning_rate": 0.00015257887958779854,
      "loss": 0.5808,
      "step": 10840
    },
    {
      "epoch": 0.34499189936147906,
      "grad_norm": 0.8064368367195129,
      "learning_rate": 0.0001524037316242088,
      "loss": 0.5862,
      "step": 10860
    },
    {
      "epoch": 0.3456272435591982,
      "grad_norm": 1.2068203687667847,
      "learning_rate": 0.00015222836176210467,
      "loss": 0.5694,
      "step": 10880
    },
    {
      "epoch": 0.3462625877569173,
      "grad_norm": 0.9752914309501648,
      "learning_rate": 0.00015205277074407266,
      "loss": 0.5367,
      "step": 10900
    },
    {
      "epoch": 0.3468979319546364,
      "grad_norm": 0.9989959597587585,
      "learning_rate": 0.00015187695931363602,
      "loss": 0.5712,
      "step": 10920
    },
    {
      "epoch": 0.3475332761523555,
      "grad_norm": 0.8734492659568787,
      "learning_rate": 0.00015170092821525114,
      "loss": 0.6029,
      "step": 10940
    },
    {
      "epoch": 0.3481686203500747,
      "grad_norm": 0.8759735822677612,
      "learning_rate": 0.00015152467819430458,
      "loss": 0.5676,
      "step": 10960
    },
    {
      "epoch": 0.3488039645477938,
      "grad_norm": 0.8554444909095764,
      "learning_rate": 0.00015134820999711,
      "loss": 0.5664,
      "step": 10980
    },
    {
      "epoch": 0.3494393087455129,
      "grad_norm": 0.730451762676239,
      "learning_rate": 0.00015117152437090482,
      "loss": 0.5735,
      "step": 11000
    },
    {
      "epoch": 0.3494393087455129,
      "eval_loss": 0.5449489951133728,
      "eval_runtime": 44.9152,
      "eval_samples_per_second": 60.18,
      "eval_steps_per_second": 30.101,
      "step": 11000
    },
    {
      "epoch": 0.350074652943232,
      "grad_norm": 0.7964712381362915,
      "learning_rate": 0.00015099462206384718,
      "loss": 0.5943,
      "step": 11020
    },
    {
      "epoch": 0.35070999714095114,
      "grad_norm": 0.809177577495575,
      "learning_rate": 0.00015081750382501277,
      "loss": 0.5986,
      "step": 11040
    },
    {
      "epoch": 0.35134534133867024,
      "grad_norm": 0.9207815527915955,
      "learning_rate": 0.00015064017040439148,
      "loss": 0.559,
      "step": 11060
    },
    {
      "epoch": 0.35198068553638934,
      "grad_norm": 0.9813947677612305,
      "learning_rate": 0.0001504626225528845,
      "loss": 0.5529,
      "step": 11080
    },
    {
      "epoch": 0.35261602973410844,
      "grad_norm": 0.9409967660903931,
      "learning_rate": 0.00015028486102230105,
      "loss": 0.5725,
      "step": 11100
    },
    {
      "epoch": 0.3532513739318276,
      "grad_norm": 0.9317089319229126,
      "learning_rate": 0.000150106886565355,
      "loss": 0.5568,
      "step": 11120
    },
    {
      "epoch": 0.3538867181295467,
      "grad_norm": 1.025341510772705,
      "learning_rate": 0.00014992869993566194,
      "loss": 0.5555,
      "step": 11140
    },
    {
      "epoch": 0.3545220623272658,
      "grad_norm": 1.0014809370040894,
      "learning_rate": 0.00014975030188773585,
      "loss": 0.5922,
      "step": 11160
    },
    {
      "epoch": 0.3551574065249849,
      "grad_norm": 0.9769735336303711,
      "learning_rate": 0.00014957169317698593,
      "loss": 0.583,
      "step": 11180
    },
    {
      "epoch": 0.355792750722704,
      "grad_norm": 0.8555041551589966,
      "learning_rate": 0.0001493928745597134,
      "loss": 0.5609,
      "step": 11200
    },
    {
      "epoch": 0.35642809492042316,
      "grad_norm": 0.9463367462158203,
      "learning_rate": 0.0001492138467931084,
      "loss": 0.5783,
      "step": 11220
    },
    {
      "epoch": 0.35706343911814226,
      "grad_norm": 0.9429970979690552,
      "learning_rate": 0.00014903461063524661,
      "loss": 0.5934,
      "step": 11240
    },
    {
      "epoch": 0.35769878331586136,
      "grad_norm": 1.4683854579925537,
      "learning_rate": 0.00014885516684508612,
      "loss": 0.5939,
      "step": 11260
    },
    {
      "epoch": 0.35833412751358046,
      "grad_norm": 0.825720489025116,
      "learning_rate": 0.00014867551618246428,
      "loss": 0.5685,
      "step": 11280
    },
    {
      "epoch": 0.3589694717112996,
      "grad_norm": 1.001832127571106,
      "learning_rate": 0.00014849565940809432,
      "loss": 0.5837,
      "step": 11300
    },
    {
      "epoch": 0.3596048159090187,
      "grad_norm": 0.9406988024711609,
      "learning_rate": 0.00014831559728356234,
      "loss": 0.5864,
      "step": 11320
    },
    {
      "epoch": 0.3602401601067378,
      "grad_norm": 0.7483388185501099,
      "learning_rate": 0.00014813533057132393,
      "loss": 0.5991,
      "step": 11340
    },
    {
      "epoch": 0.3608755043044569,
      "grad_norm": 0.8849460482597351,
      "learning_rate": 0.00014795486003470093,
      "loss": 0.5821,
      "step": 11360
    },
    {
      "epoch": 0.3615108485021761,
      "grad_norm": 0.7930045127868652,
      "learning_rate": 0.00014777418643787836,
      "loss": 0.5395,
      "step": 11380
    },
    {
      "epoch": 0.3621461926998952,
      "grad_norm": 0.9285226464271545,
      "learning_rate": 0.000147593310545901,
      "loss": 0.5713,
      "step": 11400
    },
    {
      "epoch": 0.3627815368976143,
      "grad_norm": 1.0233609676361084,
      "learning_rate": 0.00014741223312467026,
      "loss": 0.5875,
      "step": 11420
    },
    {
      "epoch": 0.3634168810953334,
      "grad_norm": 1.033948302268982,
      "learning_rate": 0.00014723095494094092,
      "loss": 0.5993,
      "step": 11440
    },
    {
      "epoch": 0.36405222529305253,
      "grad_norm": 0.9479451179504395,
      "learning_rate": 0.00014704947676231784,
      "loss": 0.571,
      "step": 11460
    },
    {
      "epoch": 0.36468756949077163,
      "grad_norm": 0.7781844735145569,
      "learning_rate": 0.0001468677993572528,
      "loss": 0.5503,
      "step": 11480
    },
    {
      "epoch": 0.36532291368849074,
      "grad_norm": 0.9249241352081299,
      "learning_rate": 0.00014668592349504101,
      "loss": 0.574,
      "step": 11500
    },
    {
      "epoch": 0.36595825788620984,
      "grad_norm": 0.9108446836471558,
      "learning_rate": 0.00014650384994581824,
      "loss": 0.557,
      "step": 11520
    },
    {
      "epoch": 0.366593602083929,
      "grad_norm": 1.0099608898162842,
      "learning_rate": 0.0001463215794805573,
      "loss": 0.5605,
      "step": 11540
    },
    {
      "epoch": 0.3672289462816481,
      "grad_norm": 0.8376953601837158,
      "learning_rate": 0.00014613911287106467,
      "loss": 0.538,
      "step": 11560
    },
    {
      "epoch": 0.3678642904793672,
      "grad_norm": 0.8893873691558838,
      "learning_rate": 0.00014595645088997757,
      "loss": 0.5606,
      "step": 11580
    },
    {
      "epoch": 0.3684996346770863,
      "grad_norm": 1.1310006380081177,
      "learning_rate": 0.00014577359431076046,
      "loss": 0.5612,
      "step": 11600
    },
    {
      "epoch": 0.36913497887480545,
      "grad_norm": 0.8577033281326294,
      "learning_rate": 0.00014559054390770167,
      "loss": 0.5688,
      "step": 11620
    },
    {
      "epoch": 0.36977032307252455,
      "grad_norm": 0.9386855959892273,
      "learning_rate": 0.00014540730045591044,
      "loss": 0.5614,
      "step": 11640
    },
    {
      "epoch": 0.37040566727024365,
      "grad_norm": 0.9492216110229492,
      "learning_rate": 0.00014522386473131332,
      "loss": 0.5878,
      "step": 11660
    },
    {
      "epoch": 0.37104101146796276,
      "grad_norm": 0.853327751159668,
      "learning_rate": 0.00014504023751065115,
      "loss": 0.5568,
      "step": 11680
    },
    {
      "epoch": 0.37167635566568186,
      "grad_norm": 0.7977784872055054,
      "learning_rate": 0.00014485641957147553,
      "loss": 0.5428,
      "step": 11700
    },
    {
      "epoch": 0.372311699863401,
      "grad_norm": 1.1006829738616943,
      "learning_rate": 0.00014467241169214567,
      "loss": 0.559,
      "step": 11720
    },
    {
      "epoch": 0.3729470440611201,
      "grad_norm": 1.08724045753479,
      "learning_rate": 0.0001444882146518251,
      "loss": 0.5642,
      "step": 11740
    },
    {
      "epoch": 0.3735823882588392,
      "grad_norm": 1.0295459032058716,
      "learning_rate": 0.00014430382923047831,
      "loss": 0.5969,
      "step": 11760
    },
    {
      "epoch": 0.3742177324565583,
      "grad_norm": 1.1096023321151733,
      "learning_rate": 0.00014411925620886742,
      "loss": 0.5678,
      "step": 11780
    },
    {
      "epoch": 0.3748530766542775,
      "grad_norm": 0.9315259456634521,
      "learning_rate": 0.000143934496368549,
      "loss": 0.5728,
      "step": 11800
    },
    {
      "epoch": 0.3754884208519966,
      "grad_norm": 0.9581449031829834,
      "learning_rate": 0.00014374955049187066,
      "loss": 0.5485,
      "step": 11820
    },
    {
      "epoch": 0.3761237650497157,
      "grad_norm": 1.472161054611206,
      "learning_rate": 0.00014356441936196776,
      "loss": 0.5931,
      "step": 11840
    },
    {
      "epoch": 0.3767591092474348,
      "grad_norm": 1.0234733819961548,
      "learning_rate": 0.00014337910376276011,
      "loss": 0.5635,
      "step": 11860
    },
    {
      "epoch": 0.37739445344515393,
      "grad_norm": 0.9299212694168091,
      "learning_rate": 0.00014319360447894862,
      "loss": 0.5802,
      "step": 11880
    },
    {
      "epoch": 0.37802979764287303,
      "grad_norm": 0.853388786315918,
      "learning_rate": 0.00014300792229601198,
      "loss": 0.5645,
      "step": 11900
    },
    {
      "epoch": 0.37866514184059213,
      "grad_norm": 0.9909472465515137,
      "learning_rate": 0.0001428220580002034,
      "loss": 0.5451,
      "step": 11920
    },
    {
      "epoch": 0.37930048603831124,
      "grad_norm": 0.8121063113212585,
      "learning_rate": 0.00014263601237854716,
      "loss": 0.5514,
      "step": 11940
    },
    {
      "epoch": 0.3799358302360304,
      "grad_norm": 0.9053930044174194,
      "learning_rate": 0.00014244978621883543,
      "loss": 0.5371,
      "step": 11960
    },
    {
      "epoch": 0.3805711744337495,
      "grad_norm": 1.0551111698150635,
      "learning_rate": 0.00014226338030962475,
      "loss": 0.5862,
      "step": 11980
    },
    {
      "epoch": 0.3812065186314686,
      "grad_norm": 0.8897386193275452,
      "learning_rate": 0.0001420767954402329,
      "loss": 0.5439,
      "step": 12000
    },
    {
      "epoch": 0.3812065186314686,
      "eval_loss": 0.5259391665458679,
      "eval_runtime": 45.0289,
      "eval_samples_per_second": 60.028,
      "eval_steps_per_second": 30.025,
      "step": 12000
    },
    {
      "epoch": 0.3818418628291877,
      "grad_norm": 0.8436812162399292,
      "learning_rate": 0.00014189003240073535,
      "loss": 0.5684,
      "step": 12020
    },
    {
      "epoch": 0.38247720702690685,
      "grad_norm": 1.2769359350204468,
      "learning_rate": 0.0001417030919819621,
      "loss": 0.5483,
      "step": 12040
    },
    {
      "epoch": 0.38311255122462595,
      "grad_norm": 0.8915470838546753,
      "learning_rate": 0.0001415159749754942,
      "loss": 0.5674,
      "step": 12060
    },
    {
      "epoch": 0.38374789542234505,
      "grad_norm": 1.1026362180709839,
      "learning_rate": 0.00014132868217366044,
      "loss": 0.5868,
      "step": 12080
    },
    {
      "epoch": 0.38438323962006415,
      "grad_norm": 0.92413729429245,
      "learning_rate": 0.00014114121436953402,
      "loss": 0.5602,
      "step": 12100
    },
    {
      "epoch": 0.3850185838177833,
      "grad_norm": 0.8880215287208557,
      "learning_rate": 0.0001409535723569291,
      "loss": 0.563,
      "step": 12120
    },
    {
      "epoch": 0.3856539280155024,
      "grad_norm": 0.7865646481513977,
      "learning_rate": 0.00014076575693039767,
      "loss": 0.5731,
      "step": 12140
    },
    {
      "epoch": 0.3862892722132215,
      "grad_norm": 0.8817760348320007,
      "learning_rate": 0.00014057776888522583,
      "loss": 0.5205,
      "step": 12160
    },
    {
      "epoch": 0.3869246164109406,
      "grad_norm": 0.7473212480545044,
      "learning_rate": 0.0001403896090174307,
      "loss": 0.5494,
      "step": 12180
    },
    {
      "epoch": 0.3875599606086597,
      "grad_norm": 0.9429736137390137,
      "learning_rate": 0.0001402012781237571,
      "loss": 0.551,
      "step": 12200
    },
    {
      "epoch": 0.38819530480637887,
      "grad_norm": 0.9144492149353027,
      "learning_rate": 0.00014001277700167382,
      "loss": 0.529,
      "step": 12220
    },
    {
      "epoch": 0.388830649004098,
      "grad_norm": 0.8465405702590942,
      "learning_rate": 0.00013982410644937057,
      "loss": 0.566,
      "step": 12240
    },
    {
      "epoch": 0.3894659932018171,
      "grad_norm": 0.8520842790603638,
      "learning_rate": 0.00013963526726575446,
      "loss": 0.61,
      "step": 12260
    },
    {
      "epoch": 0.3901013373995362,
      "grad_norm": 0.8384197354316711,
      "learning_rate": 0.00013944626025044673,
      "loss": 0.563,
      "step": 12280
    },
    {
      "epoch": 0.39073668159725533,
      "grad_norm": 0.9083155989646912,
      "learning_rate": 0.00013925708620377927,
      "loss": 0.5433,
      "step": 12300
    },
    {
      "epoch": 0.39137202579497443,
      "grad_norm": 1.0582692623138428,
      "learning_rate": 0.00013906774592679116,
      "loss": 0.5368,
      "step": 12320
    },
    {
      "epoch": 0.39200736999269353,
      "grad_norm": 0.8538171648979187,
      "learning_rate": 0.00013887824022122537,
      "loss": 0.5217,
      "step": 12340
    },
    {
      "epoch": 0.39264271419041263,
      "grad_norm": 0.8264597058296204,
      "learning_rate": 0.00013868856988952556,
      "loss": 0.5564,
      "step": 12360
    },
    {
      "epoch": 0.3932780583881318,
      "grad_norm": 0.8192921280860901,
      "learning_rate": 0.00013849873573483222,
      "loss": 0.6058,
      "step": 12380
    },
    {
      "epoch": 0.3939134025858509,
      "grad_norm": 0.8523415923118591,
      "learning_rate": 0.00013830873856097964,
      "loss": 0.5565,
      "step": 12400
    },
    {
      "epoch": 0.39454874678357,
      "grad_norm": 1.0821831226348877,
      "learning_rate": 0.00013811857917249253,
      "loss": 0.5617,
      "step": 12420
    },
    {
      "epoch": 0.3951840909812891,
      "grad_norm": 0.8053098917007446,
      "learning_rate": 0.00013792825837458225,
      "loss": 0.579,
      "step": 12440
    },
    {
      "epoch": 0.39581943517900825,
      "grad_norm": 0.9511120319366455,
      "learning_rate": 0.00013773777697314378,
      "loss": 0.5417,
      "step": 12460
    },
    {
      "epoch": 0.39645477937672735,
      "grad_norm": 1.0273131132125854,
      "learning_rate": 0.00013754713577475213,
      "loss": 0.582,
      "step": 12480
    },
    {
      "epoch": 0.39709012357444645,
      "grad_norm": 1.0347099304199219,
      "learning_rate": 0.00013735633558665893,
      "loss": 0.5679,
      "step": 12500
    },
    {
      "epoch": 0.39772546777216555,
      "grad_norm": 1.0762611627578735,
      "learning_rate": 0.00013716537721678907,
      "loss": 0.5483,
      "step": 12520
    },
    {
      "epoch": 0.3983608119698847,
      "grad_norm": 1.4243688583374023,
      "learning_rate": 0.00013697426147373721,
      "loss": 0.5558,
      "step": 12540
    },
    {
      "epoch": 0.3989961561676038,
      "grad_norm": 0.7539466023445129,
      "learning_rate": 0.00013678298916676445,
      "loss": 0.5404,
      "step": 12560
    },
    {
      "epoch": 0.3996315003653229,
      "grad_norm": 0.7736854553222656,
      "learning_rate": 0.00013659156110579476,
      "loss": 0.5578,
      "step": 12580
    },
    {
      "epoch": 0.400266844563042,
      "grad_norm": 0.9489171504974365,
      "learning_rate": 0.0001363999781014117,
      "loss": 0.5668,
      "step": 12600
    },
    {
      "epoch": 0.40090218876076117,
      "grad_norm": 0.9692643880844116,
      "learning_rate": 0.00013621783146979094,
      "loss": 0.5663,
      "step": 12620
    },
    {
      "epoch": 0.40153753295848027,
      "grad_norm": 1.0705336332321167,
      "learning_rate": 0.00013602594865967435,
      "loss": 0.5293,
      "step": 12640
    },
    {
      "epoch": 0.40217287715619937,
      "grad_norm": 1.0149205923080444,
      "learning_rate": 0.00013583391330117533,
      "loss": 0.5348,
      "step": 12660
    },
    {
      "epoch": 0.40280822135391847,
      "grad_norm": 0.9088581204414368,
      "learning_rate": 0.00013564172620744906,
      "loss": 0.5677,
      "step": 12680
    },
    {
      "epoch": 0.4034435655516376,
      "grad_norm": 1.1513986587524414,
      "learning_rate": 0.00013544938819229306,
      "loss": 0.569,
      "step": 12700
    },
    {
      "epoch": 0.40407890974935673,
      "grad_norm": 0.8725998401641846,
      "learning_rate": 0.00013525690007014406,
      "loss": 0.5692,
      "step": 12720
    },
    {
      "epoch": 0.40471425394707583,
      "grad_norm": 1.0663046836853027,
      "learning_rate": 0.00013506426265607425,
      "loss": 0.567,
      "step": 12740
    },
    {
      "epoch": 0.40534959814479493,
      "grad_norm": 0.9139559864997864,
      "learning_rate": 0.00013487147676578812,
      "loss": 0.5465,
      "step": 12760
    },
    {
      "epoch": 0.40598494234251403,
      "grad_norm": 1.3140777349472046,
      "learning_rate": 0.00013467854321561878,
      "loss": 0.5407,
      "step": 12780
    },
    {
      "epoch": 0.4066202865402332,
      "grad_norm": 0.8671903610229492,
      "learning_rate": 0.00013448546282252458,
      "loss": 0.5303,
      "step": 12800
    },
    {
      "epoch": 0.4072556307379523,
      "grad_norm": 0.692545473575592,
      "learning_rate": 0.00013429223640408578,
      "loss": 0.5333,
      "step": 12820
    },
    {
      "epoch": 0.4078909749356714,
      "grad_norm": 1.1087654829025269,
      "learning_rate": 0.00013409886477850087,
      "loss": 0.5493,
      "step": 12840
    },
    {
      "epoch": 0.4085263191333905,
      "grad_norm": 0.9659181833267212,
      "learning_rate": 0.00013390534876458319,
      "loss": 0.5902,
      "step": 12860
    },
    {
      "epoch": 0.40916166333110965,
      "grad_norm": 0.7794270515441895,
      "learning_rate": 0.00013371168918175754,
      "loss": 0.5647,
      "step": 12880
    },
    {
      "epoch": 0.40979700752882875,
      "grad_norm": 0.910505473613739,
      "learning_rate": 0.00013351788685005662,
      "loss": 0.5752,
      "step": 12900
    },
    {
      "epoch": 0.41043235172654785,
      "grad_norm": 0.9549837112426758,
      "learning_rate": 0.00013332394259011758,
      "loss": 0.5424,
      "step": 12920
    },
    {
      "epoch": 0.41106769592426695,
      "grad_norm": 1.2679826021194458,
      "learning_rate": 0.00013312985722317862,
      "loss": 0.5285,
      "step": 12940
    },
    {
      "epoch": 0.4117030401219861,
      "grad_norm": 0.8822807669639587,
      "learning_rate": 0.0001329356315710753,
      "loss": 0.5662,
      "step": 12960
    },
    {
      "epoch": 0.4123383843197052,
      "grad_norm": 0.8247064352035522,
      "learning_rate": 0.0001327412664562373,
      "loss": 0.5338,
      "step": 12980
    },
    {
      "epoch": 0.4129737285174243,
      "grad_norm": 0.8655696511268616,
      "learning_rate": 0.0001325467627016849,
      "loss": 0.5563,
      "step": 13000
    },
    {
      "epoch": 0.4129737285174243,
      "eval_loss": 0.5103311538696289,
      "eval_runtime": 44.4811,
      "eval_samples_per_second": 60.767,
      "eval_steps_per_second": 30.395,
      "step": 13000
    },
    {
      "epoch": 0.4136090727151434,
      "grad_norm": 1.1745620965957642,
      "learning_rate": 0.00013235212113102532,
      "loss": 0.5432,
      "step": 13020
    },
    {
      "epoch": 0.41424441691286257,
      "grad_norm": 1.375957727432251,
      "learning_rate": 0.0001321573425684494,
      "loss": 0.5518,
      "step": 13040
    },
    {
      "epoch": 0.41487976111058167,
      "grad_norm": 1.2425376176834106,
      "learning_rate": 0.00013196242783872805,
      "loss": 0.5667,
      "step": 13060
    },
    {
      "epoch": 0.41551510530830077,
      "grad_norm": 0.9375765919685364,
      "learning_rate": 0.00013176737776720876,
      "loss": 0.5629,
      "step": 13080
    },
    {
      "epoch": 0.41615044950601987,
      "grad_norm": 0.9392895698547363,
      "learning_rate": 0.00013157219317981217,
      "loss": 0.5577,
      "step": 13100
    },
    {
      "epoch": 0.416785793703739,
      "grad_norm": 0.9028527140617371,
      "learning_rate": 0.00013137687490302844,
      "loss": 0.5358,
      "step": 13120
    },
    {
      "epoch": 0.41742113790145813,
      "grad_norm": 0.9373983144760132,
      "learning_rate": 0.00013118142376391381,
      "loss": 0.5517,
      "step": 13140
    },
    {
      "epoch": 0.41805648209917723,
      "grad_norm": 1.3339825868606567,
      "learning_rate": 0.00013098584059008725,
      "loss": 0.5512,
      "step": 13160
    },
    {
      "epoch": 0.41869182629689633,
      "grad_norm": 0.7137243747711182,
      "learning_rate": 0.00013079012620972663,
      "loss": 0.5464,
      "step": 13180
    },
    {
      "epoch": 0.41932717049461543,
      "grad_norm": 1.1450612545013428,
      "learning_rate": 0.00013059428145156555,
      "loss": 0.564,
      "step": 13200
    },
    {
      "epoch": 0.4199625146923346,
      "grad_norm": 1.2148438692092896,
      "learning_rate": 0.00013039830714488965,
      "loss": 0.5555,
      "step": 13220
    },
    {
      "epoch": 0.4205978588900537,
      "grad_norm": 1.277346134185791,
      "learning_rate": 0.00013020220411953304,
      "loss": 0.5898,
      "step": 13240
    },
    {
      "epoch": 0.4212332030877728,
      "grad_norm": 1.0933984518051147,
      "learning_rate": 0.00013000597320587492,
      "loss": 0.553,
      "step": 13260
    },
    {
      "epoch": 0.4218685472854919,
      "grad_norm": 0.7297493815422058,
      "learning_rate": 0.00012980961523483616,
      "loss": 0.5626,
      "step": 13280
    },
    {
      "epoch": 0.42250389148321105,
      "grad_norm": 0.8859849572181702,
      "learning_rate": 0.00012961313103787548,
      "loss": 0.5455,
      "step": 13300
    },
    {
      "epoch": 0.42313923568093015,
      "grad_norm": 0.9647216200828552,
      "learning_rate": 0.00012941652144698608,
      "loss": 0.5157,
      "step": 13320
    },
    {
      "epoch": 0.42377457987864925,
      "grad_norm": 0.9097155332565308,
      "learning_rate": 0.00012921978729469222,
      "loss": 0.542,
      "step": 13340
    },
    {
      "epoch": 0.42440992407636835,
      "grad_norm": 1.0074721574783325,
      "learning_rate": 0.0001290229294140456,
      "loss": 0.5319,
      "step": 13360
    },
    {
      "epoch": 0.4250452682740875,
      "grad_norm": 0.7759230732917786,
      "learning_rate": 0.0001288259486386218,
      "loss": 0.4939,
      "step": 13380
    },
    {
      "epoch": 0.4256806124718066,
      "grad_norm": 0.8912795782089233,
      "learning_rate": 0.00012862884580251675,
      "loss": 0.5276,
      "step": 13400
    },
    {
      "epoch": 0.4263159566695257,
      "grad_norm": 1.090395450592041,
      "learning_rate": 0.00012843162174034332,
      "loss": 0.5227,
      "step": 13420
    },
    {
      "epoch": 0.4269513008672448,
      "grad_norm": 0.8524248003959656,
      "learning_rate": 0.00012823427728722762,
      "loss": 0.5438,
      "step": 13440
    },
    {
      "epoch": 0.42758664506496397,
      "grad_norm": 1.209073543548584,
      "learning_rate": 0.0001280368132788056,
      "loss": 0.5495,
      "step": 13460
    },
    {
      "epoch": 0.42822198926268307,
      "grad_norm": 0.9301733374595642,
      "learning_rate": 0.00012783923055121945,
      "loss": 0.5411,
      "step": 13480
    },
    {
      "epoch": 0.42885733346040217,
      "grad_norm": 0.916028618812561,
      "learning_rate": 0.000127641529941114,
      "loss": 0.5674,
      "step": 13500
    },
    {
      "epoch": 0.42949267765812127,
      "grad_norm": 0.9181066751480103,
      "learning_rate": 0.00012744371228563334,
      "loss": 0.5522,
      "step": 13520
    },
    {
      "epoch": 0.4301280218558404,
      "grad_norm": 1.2208302021026611,
      "learning_rate": 0.0001272457784224171,
      "loss": 0.5428,
      "step": 13540
    },
    {
      "epoch": 0.4307633660535595,
      "grad_norm": 0.8382121920585632,
      "learning_rate": 0.00012704772918959706,
      "loss": 0.5347,
      "step": 13560
    },
    {
      "epoch": 0.4313987102512786,
      "grad_norm": 0.7942314147949219,
      "learning_rate": 0.0001268495654257934,
      "loss": 0.5455,
      "step": 13580
    },
    {
      "epoch": 0.43203405444899773,
      "grad_norm": 1.0586442947387695,
      "learning_rate": 0.00012665128797011138,
      "loss": 0.5588,
      "step": 13600
    },
    {
      "epoch": 0.4326693986467169,
      "grad_norm": 0.9026583433151245,
      "learning_rate": 0.00012645289766213764,
      "loss": 0.5448,
      "step": 13620
    },
    {
      "epoch": 0.433304742844436,
      "grad_norm": 1.107459545135498,
      "learning_rate": 0.0001262643231052632,
      "loss": 0.5226,
      "step": 13640
    },
    {
      "epoch": 0.4339400870421551,
      "grad_norm": 0.7181698679924011,
      "learning_rate": 0.00012606571515198816,
      "loss": 0.5587,
      "step": 13660
    },
    {
      "epoch": 0.4345754312398742,
      "grad_norm": 0.850642740726471,
      "learning_rate": 0.0001258669968259726,
      "loss": 0.5514,
      "step": 13680
    },
    {
      "epoch": 0.4352107754375933,
      "grad_norm": 0.9803110957145691,
      "learning_rate": 0.00012567811294990802,
      "loss": 0.5612,
      "step": 13700
    },
    {
      "epoch": 0.43584611963531245,
      "grad_norm": 0.8320556282997131,
      "learning_rate": 0.00012547918181770158,
      "loss": 0.5464,
      "step": 13720
    },
    {
      "epoch": 0.43648146383303155,
      "grad_norm": 0.9645776152610779,
      "learning_rate": 0.0001252801427963731,
      "loss": 0.5394,
      "step": 13740
    },
    {
      "epoch": 0.43711680803075065,
      "grad_norm": 0.981066107749939,
      "learning_rate": 0.00012508099672873401,
      "loss": 0.5518,
      "step": 13760
    },
    {
      "epoch": 0.43775215222846975,
      "grad_norm": 0.950231671333313,
      "learning_rate": 0.00012488174445804905,
      "loss": 0.5628,
      "step": 13780
    },
    {
      "epoch": 0.4383874964261889,
      "grad_norm": 0.7942489981651306,
      "learning_rate": 0.00012468238682803256,
      "loss": 0.5682,
      "step": 13800
    },
    {
      "epoch": 0.439022840623908,
      "grad_norm": 0.9598709940910339,
      "learning_rate": 0.0001244829246828451,
      "loss": 0.5398,
      "step": 13820
    },
    {
      "epoch": 0.4396581848216271,
      "grad_norm": 0.9328323602676392,
      "learning_rate": 0.0001242833588670898,
      "loss": 0.5465,
      "step": 13840
    },
    {
      "epoch": 0.4402935290193462,
      "grad_norm": 0.9036662578582764,
      "learning_rate": 0.00012408369022580865,
      "loss": 0.5307,
      "step": 13860
    },
    {
      "epoch": 0.44092887321706536,
      "grad_norm": 1.1593483686447144,
      "learning_rate": 0.0001238839196044792,
      "loss": 0.5838,
      "step": 13880
    },
    {
      "epoch": 0.44156421741478447,
      "grad_norm": 0.9283963441848755,
      "learning_rate": 0.0001236840478490107,
      "loss": 0.5112,
      "step": 13900
    },
    {
      "epoch": 0.44219956161250357,
      "grad_norm": 1.1374804973602295,
      "learning_rate": 0.00012348407580574068,
      "loss": 0.5616,
      "step": 13920
    },
    {
      "epoch": 0.44283490581022267,
      "grad_norm": 0.8757379055023193,
      "learning_rate": 0.00012328400432143143,
      "loss": 0.5409,
      "step": 13940
    },
    {
      "epoch": 0.4434702500079418,
      "grad_norm": 0.9971847534179688,
      "learning_rate": 0.00012308383424326617,
      "loss": 0.5573,
      "step": 13960
    },
    {
      "epoch": 0.4441055942056609,
      "grad_norm": 0.8985651135444641,
      "learning_rate": 0.00012288356641884567,
      "loss": 0.5602,
      "step": 13980
    },
    {
      "epoch": 0.44474093840338,
      "grad_norm": 0.8877219557762146,
      "learning_rate": 0.0001226832016961846,
      "loss": 0.5418,
      "step": 14000
    },
    {
      "epoch": 0.44474093840338,
      "eval_loss": 0.49767744541168213,
      "eval_runtime": 45.8378,
      "eval_samples_per_second": 58.969,
      "eval_steps_per_second": 29.495,
      "step": 14000
    },
    {
      "epoch": 0.4453762826010991,
      "grad_norm": 0.9760685563087463,
      "learning_rate": 0.00012248274092370795,
      "loss": 0.5386,
      "step": 14020
    },
    {
      "epoch": 0.4460116267988183,
      "grad_norm": 0.9159601330757141,
      "learning_rate": 0.00012228218495024734,
      "loss": 0.5658,
      "step": 14040
    },
    {
      "epoch": 0.4466469709965374,
      "grad_norm": 0.9726976752281189,
      "learning_rate": 0.00012208153462503764,
      "loss": 0.5619,
      "step": 14060
    },
    {
      "epoch": 0.4472823151942565,
      "grad_norm": 0.8647946715354919,
      "learning_rate": 0.00012188079079771311,
      "loss": 0.5312,
      "step": 14080
    },
    {
      "epoch": 0.4479176593919756,
      "grad_norm": 0.8291323781013489,
      "learning_rate": 0.00012167995431830404,
      "loss": 0.5555,
      "step": 14100
    },
    {
      "epoch": 0.44855300358969474,
      "grad_norm": 1.1393893957138062,
      "learning_rate": 0.00012147902603723302,
      "loss": 0.5368,
      "step": 14120
    },
    {
      "epoch": 0.44918834778741384,
      "grad_norm": 0.9214714169502258,
      "learning_rate": 0.00012127800680531129,
      "loss": 0.5312,
      "step": 14140
    },
    {
      "epoch": 0.44982369198513295,
      "grad_norm": 0.7314972877502441,
      "learning_rate": 0.00012107689747373533,
      "loss": 0.5306,
      "step": 14160
    },
    {
      "epoch": 0.45045903618285205,
      "grad_norm": 0.9739118218421936,
      "learning_rate": 0.00012087569889408308,
      "loss": 0.5474,
      "step": 14180
    },
    {
      "epoch": 0.45109438038057115,
      "grad_norm": 1.1331558227539062,
      "learning_rate": 0.00012067441191831035,
      "loss": 0.5251,
      "step": 14200
    },
    {
      "epoch": 0.4517297245782903,
      "grad_norm": 0.9672099947929382,
      "learning_rate": 0.00012047303739874733,
      "loss": 0.5638,
      "step": 14220
    },
    {
      "epoch": 0.4523650687760094,
      "grad_norm": 0.9430161118507385,
      "learning_rate": 0.00012027157618809488,
      "loss": 0.5473,
      "step": 14240
    },
    {
      "epoch": 0.4530004129737285,
      "grad_norm": 0.9385126233100891,
      "learning_rate": 0.00012007002913942092,
      "loss": 0.5305,
      "step": 14260
    },
    {
      "epoch": 0.4536357571714476,
      "grad_norm": 1.2930362224578857,
      "learning_rate": 0.00011986839710615689,
      "loss": 0.5264,
      "step": 14280
    },
    {
      "epoch": 0.45427110136916676,
      "grad_norm": 1.098981499671936,
      "learning_rate": 0.00011966668094209401,
      "loss": 0.5945,
      "step": 14300
    },
    {
      "epoch": 0.45490644556688586,
      "grad_norm": 1.016724944114685,
      "learning_rate": 0.00011946488150137987,
      "loss": 0.5423,
      "step": 14320
    },
    {
      "epoch": 0.45554178976460497,
      "grad_norm": 1.3441358804702759,
      "learning_rate": 0.00011926299963851455,
      "loss": 0.5311,
      "step": 14340
    },
    {
      "epoch": 0.45617713396232407,
      "grad_norm": 0.8672164678573608,
      "learning_rate": 0.00011906103620834721,
      "loss": 0.5377,
      "step": 14360
    },
    {
      "epoch": 0.4568124781600432,
      "grad_norm": 0.8844342231750488,
      "learning_rate": 0.00011885899206607243,
      "loss": 0.5539,
      "step": 14380
    },
    {
      "epoch": 0.4574478223577623,
      "grad_norm": 1.0755807161331177,
      "learning_rate": 0.00011865686806722647,
      "loss": 0.5489,
      "step": 14400
    },
    {
      "epoch": 0.4580831665554814,
      "grad_norm": 0.8909132480621338,
      "learning_rate": 0.00011845466506768379,
      "loss": 0.5492,
      "step": 14420
    },
    {
      "epoch": 0.4587185107532005,
      "grad_norm": 0.7222205996513367,
      "learning_rate": 0.00011826249982356501,
      "loss": 0.5452,
      "step": 14440
    },
    {
      "epoch": 0.4593538549509197,
      "grad_norm": 0.8589527606964111,
      "learning_rate": 0.00011806014523563623,
      "loss": 0.5553,
      "step": 14460
    },
    {
      "epoch": 0.4599891991486388,
      "grad_norm": 0.8546582460403442,
      "learning_rate": 0.00011785771417377567,
      "loss": 0.518,
      "step": 14480
    },
    {
      "epoch": 0.4606245433463579,
      "grad_norm": 0.7938315272331238,
      "learning_rate": 0.00011765520749515795,
      "loss": 0.5732,
      "step": 14500
    },
    {
      "epoch": 0.461259887544077,
      "grad_norm": 1.030897617340088,
      "learning_rate": 0.000117452626057278,
      "loss": 0.5293,
      "step": 14520
    },
    {
      "epoch": 0.46189523174179614,
      "grad_norm": 0.9275230765342712,
      "learning_rate": 0.00011724997071794722,
      "loss": 0.5453,
      "step": 14540
    },
    {
      "epoch": 0.46253057593951524,
      "grad_norm": 0.8049765825271606,
      "learning_rate": 0.00011704724233528997,
      "loss": 0.5237,
      "step": 14560
    },
    {
      "epoch": 0.46316592013723434,
      "grad_norm": 0.9411914348602295,
      "learning_rate": 0.00011684444176773994,
      "loss": 0.5529,
      "step": 14580
    },
    {
      "epoch": 0.46380126433495344,
      "grad_norm": 1.0553874969482422,
      "learning_rate": 0.0001166415698740364,
      "loss": 0.5107,
      "step": 14600
    },
    {
      "epoch": 0.4644366085326726,
      "grad_norm": 1.1203105449676514,
      "learning_rate": 0.00011643862751322072,
      "loss": 0.5503,
      "step": 14620
    },
    {
      "epoch": 0.4650719527303917,
      "grad_norm": 0.9356998801231384,
      "learning_rate": 0.00011623561554463263,
      "loss": 0.5388,
      "step": 14640
    },
    {
      "epoch": 0.4657072969281108,
      "grad_norm": 1.0603325366973877,
      "learning_rate": 0.00011603253482790657,
      "loss": 0.5379,
      "step": 14660
    },
    {
      "epoch": 0.4663426411258299,
      "grad_norm": 0.7650070786476135,
      "learning_rate": 0.00011582938622296818,
      "loss": 0.5175,
      "step": 14680
    },
    {
      "epoch": 0.466977985323549,
      "grad_norm": 1.1926647424697876,
      "learning_rate": 0.00011562617059003044,
      "loss": 0.5558,
      "step": 14700
    },
    {
      "epoch": 0.46761332952126816,
      "grad_norm": 0.9466400742530823,
      "learning_rate": 0.00011542288878959025,
      "loss": 0.5288,
      "step": 14720
    },
    {
      "epoch": 0.46824867371898726,
      "grad_norm": 1.036163091659546,
      "learning_rate": 0.0001152195416824247,
      "loss": 0.5322,
      "step": 14740
    },
    {
      "epoch": 0.46888401791670636,
      "grad_norm": 0.8458572626113892,
      "learning_rate": 0.00011501613012958729,
      "loss": 0.5358,
      "step": 14760
    },
    {
      "epoch": 0.46951936211442546,
      "grad_norm": 0.789557695388794,
      "learning_rate": 0.00011481265499240455,
      "loss": 0.5067,
      "step": 14780
    },
    {
      "epoch": 0.4701547063121446,
      "grad_norm": 0.845371425151825,
      "learning_rate": 0.00011460911713247222,
      "loss": 0.5433,
      "step": 14800
    },
    {
      "epoch": 0.4707900505098637,
      "grad_norm": 0.8561549782752991,
      "learning_rate": 0.00011440551741165156,
      "loss": 0.5362,
      "step": 14820
    },
    {
      "epoch": 0.4714253947075828,
      "grad_norm": 0.921575665473938,
      "learning_rate": 0.00011420185669206582,
      "loss": 0.5093,
      "step": 14840
    },
    {
      "epoch": 0.4720607389053019,
      "grad_norm": 0.9392147660255432,
      "learning_rate": 0.0001139981358360966,
      "loss": 0.5419,
      "step": 14860
    },
    {
      "epoch": 0.4726960831030211,
      "grad_norm": 0.859464168548584,
      "learning_rate": 0.00011379435570638002,
      "loss": 0.5329,
      "step": 14880
    },
    {
      "epoch": 0.4733314273007402,
      "grad_norm": 0.9370890259742737,
      "learning_rate": 0.00011359051716580331,
      "loss": 0.516,
      "step": 14900
    },
    {
      "epoch": 0.4739667714984593,
      "grad_norm": 0.8993077278137207,
      "learning_rate": 0.00011338662107750098,
      "loss": 0.4785,
      "step": 14920
    },
    {
      "epoch": 0.4746021156961784,
      "grad_norm": 0.7652683854103088,
      "learning_rate": 0.00011318266830485119,
      "loss": 0.5348,
      "step": 14940
    },
    {
      "epoch": 0.47523745989389754,
      "grad_norm": 1.0513384342193604,
      "learning_rate": 0.00011297865971147217,
      "loss": 0.5181,
      "step": 14960
    },
    {
      "epoch": 0.47587280409161664,
      "grad_norm": 0.8159809112548828,
      "learning_rate": 0.00011277459616121851,
      "loss": 0.5368,
      "step": 14980
    },
    {
      "epoch": 0.47650814828933574,
      "grad_norm": 1.0844529867172241,
      "learning_rate": 0.00011257047851817748,
      "loss": 0.5497,
      "step": 15000
    },
    {
      "epoch": 0.47650814828933574,
      "eval_loss": 0.4893677234649658,
      "eval_runtime": 45.7511,
      "eval_samples_per_second": 59.081,
      "eval_steps_per_second": 29.551,
      "step": 15000
    },
    {
      "epoch": 0.47714349248705484,
      "grad_norm": 0.7700105309486389,
      "learning_rate": 0.0001123663076466655,
      "loss": 0.5354,
      "step": 15020
    },
    {
      "epoch": 0.477778836684774,
      "grad_norm": 0.872631847858429,
      "learning_rate": 0.0001121620844112242,
      "loss": 0.5243,
      "step": 15040
    },
    {
      "epoch": 0.4784141808824931,
      "grad_norm": 1.1037932634353638,
      "learning_rate": 0.0001119578096766171,
      "loss": 0.5412,
      "step": 15060
    },
    {
      "epoch": 0.4790495250802122,
      "grad_norm": 0.9620169997215271,
      "learning_rate": 0.00011175348430782579,
      "loss": 0.5137,
      "step": 15080
    },
    {
      "epoch": 0.4796848692779313,
      "grad_norm": 0.7465859055519104,
      "learning_rate": 0.0001115491091700461,
      "loss": 0.5213,
      "step": 15100
    },
    {
      "epoch": 0.48032021347565046,
      "grad_norm": 0.7287941575050354,
      "learning_rate": 0.00011134468512868479,
      "loss": 0.5184,
      "step": 15120
    },
    {
      "epoch": 0.48095555767336956,
      "grad_norm": 0.9596436023712158,
      "learning_rate": 0.00011114021304935558,
      "loss": 0.5471,
      "step": 15140
    },
    {
      "epoch": 0.48159090187108866,
      "grad_norm": 0.869172215461731,
      "learning_rate": 0.00011093569379787563,
      "loss": 0.5074,
      "step": 15160
    },
    {
      "epoch": 0.48222624606880776,
      "grad_norm": 1.0704097747802734,
      "learning_rate": 0.00011073112824026191,
      "loss": 0.544,
      "step": 15180
    },
    {
      "epoch": 0.48286159026652686,
      "grad_norm": 0.896312415599823,
      "learning_rate": 0.00011052651724272736,
      "loss": 0.5261,
      "step": 15200
    },
    {
      "epoch": 0.483496934464246,
      "grad_norm": 1.010606288909912,
      "learning_rate": 0.00011032186167167741,
      "loss": 0.5112,
      "step": 15220
    },
    {
      "epoch": 0.4841322786619651,
      "grad_norm": 0.980171263217926,
      "learning_rate": 0.00011011716239370625,
      "loss": 0.5414,
      "step": 15240
    },
    {
      "epoch": 0.4847676228596842,
      "grad_norm": 0.7417489290237427,
      "learning_rate": 0.00010991242027559301,
      "loss": 0.5019,
      "step": 15260
    },
    {
      "epoch": 0.4854029670574033,
      "grad_norm": 0.9232955574989319,
      "learning_rate": 0.0001097076361842984,
      "loss": 0.5293,
      "step": 15280
    },
    {
      "epoch": 0.4860383112551225,
      "grad_norm": 0.8391673564910889,
      "learning_rate": 0.00010950281098696072,
      "loss": 0.5397,
      "step": 15300
    },
    {
      "epoch": 0.4866736554528416,
      "grad_norm": 1.0795869827270508,
      "learning_rate": 0.00010929794555089239,
      "loss": 0.5293,
      "step": 15320
    },
    {
      "epoch": 0.4873089996505607,
      "grad_norm": 0.9179370403289795,
      "learning_rate": 0.00010909304074357627,
      "loss": 0.5089,
      "step": 15340
    },
    {
      "epoch": 0.4879443438482798,
      "grad_norm": 0.9346722960472107,
      "learning_rate": 0.0001088880974326618,
      "loss": 0.4981,
      "step": 15360
    },
    {
      "epoch": 0.48857968804599894,
      "grad_norm": 0.9835326075553894,
      "learning_rate": 0.00010868311648596157,
      "loss": 0.52,
      "step": 15380
    },
    {
      "epoch": 0.48921503224371804,
      "grad_norm": 0.8709509968757629,
      "learning_rate": 0.0001084780987714475,
      "loss": 0.5507,
      "step": 15400
    },
    {
      "epoch": 0.48985037644143714,
      "grad_norm": 1.0125563144683838,
      "learning_rate": 0.00010827304515724719,
      "loss": 0.5522,
      "step": 15420
    },
    {
      "epoch": 0.49048572063915624,
      "grad_norm": 0.9726683497428894,
      "learning_rate": 0.00010806795651164026,
      "loss": 0.5195,
      "step": 15440
    },
    {
      "epoch": 0.4911210648368754,
      "grad_norm": 0.9348143935203552,
      "learning_rate": 0.0001078628337030547,
      "loss": 0.5376,
      "step": 15460
    },
    {
      "epoch": 0.4917564090345945,
      "grad_norm": 1.247452735900879,
      "learning_rate": 0.00010765767760006308,
      "loss": 0.5238,
      "step": 15480
    },
    {
      "epoch": 0.4923917532323136,
      "grad_norm": 1.2584036588668823,
      "learning_rate": 0.00010745248907137906,
      "loss": 0.539,
      "step": 15500
    },
    {
      "epoch": 0.4930270974300327,
      "grad_norm": 0.9565659165382385,
      "learning_rate": 0.00010724726898585353,
      "loss": 0.546,
      "step": 15520
    },
    {
      "epoch": 0.49366244162775186,
      "grad_norm": 0.9646620750427246,
      "learning_rate": 0.000107042018212471,
      "loss": 0.5094,
      "step": 15540
    },
    {
      "epoch": 0.49429778582547096,
      "grad_norm": 0.7045026421546936,
      "learning_rate": 0.00010683673762034594,
      "loss": 0.5708,
      "step": 15560
    },
    {
      "epoch": 0.49493313002319006,
      "grad_norm": 1.1588184833526611,
      "learning_rate": 0.00010663142807871911,
      "loss": 0.5681,
      "step": 15580
    },
    {
      "epoch": 0.49556847422090916,
      "grad_norm": 0.8272905349731445,
      "learning_rate": 0.00010642609045695382,
      "loss": 0.5239,
      "step": 15600
    },
    {
      "epoch": 0.4962038184186283,
      "grad_norm": 0.9670738577842712,
      "learning_rate": 0.00010622072562453234,
      "loss": 0.486,
      "step": 15620
    },
    {
      "epoch": 0.4968391626163474,
      "grad_norm": 0.8635004162788391,
      "learning_rate": 0.00010601533445105205,
      "loss": 0.5419,
      "step": 15640
    },
    {
      "epoch": 0.4974745068140665,
      "grad_norm": 1.0769212245941162,
      "learning_rate": 0.00010580991780622196,
      "loss": 0.5252,
      "step": 15660
    },
    {
      "epoch": 0.4981098510117856,
      "grad_norm": 0.9688665270805359,
      "learning_rate": 0.00010560447655985894,
      "loss": 0.5559,
      "step": 15680
    },
    {
      "epoch": 0.4987451952095048,
      "grad_norm": 0.9587375521659851,
      "learning_rate": 0.00010539901158188398,
      "loss": 0.5136,
      "step": 15700
    },
    {
      "epoch": 0.4993805394072239,
      "grad_norm": 0.870891273021698,
      "learning_rate": 0.0001051935237423186,
      "loss": 0.5274,
      "step": 15720
    },
    {
      "epoch": 0.500015883604943,
      "grad_norm": 1.1741816997528076,
      "learning_rate": 0.00010498801391128108,
      "loss": 0.5274,
      "step": 15740
    },
    {
      "epoch": 0.5006512278026621,
      "grad_norm": 1.074429988861084,
      "learning_rate": 0.00010478248295898285,
      "loss": 0.5049,
      "step": 15760
    },
    {
      "epoch": 0.5012865720003812,
      "grad_norm": 0.7894431352615356,
      "learning_rate": 0.00010457693175572483,
      "loss": 0.5141,
      "step": 15780
    },
    {
      "epoch": 0.5019219161981003,
      "grad_norm": 0.8638029098510742,
      "learning_rate": 0.00010437136117189356,
      "loss": 0.5053,
      "step": 15800
    },
    {
      "epoch": 0.5025572603958194,
      "grad_norm": 0.9749894142150879,
      "learning_rate": 0.00010416577207795776,
      "loss": 0.5319,
      "step": 15820
    },
    {
      "epoch": 0.5031926045935385,
      "grad_norm": 0.9491709470748901,
      "learning_rate": 0.00010396016534446451,
      "loss": 0.4968,
      "step": 15840
    },
    {
      "epoch": 0.5038279487912577,
      "grad_norm": 0.880732536315918,
      "learning_rate": 0.00010375454184203555,
      "loss": 0.5292,
      "step": 15860
    },
    {
      "epoch": 0.5044632929889767,
      "grad_norm": 1.22807776927948,
      "learning_rate": 0.00010354890244136361,
      "loss": 0.5228,
      "step": 15880
    },
    {
      "epoch": 0.5050986371866959,
      "grad_norm": 0.8567366003990173,
      "learning_rate": 0.00010334324801320881,
      "loss": 0.558,
      "step": 15900
    },
    {
      "epoch": 0.505733981384415,
      "grad_norm": 0.8203198909759521,
      "learning_rate": 0.00010313757942839482,
      "loss": 0.5061,
      "step": 15920
    },
    {
      "epoch": 0.5063693255821341,
      "grad_norm": 0.9894897937774658,
      "learning_rate": 0.00010293189755780535,
      "loss": 0.5322,
      "step": 15940
    },
    {
      "epoch": 0.5070046697798533,
      "grad_norm": 1.0645695924758911,
      "learning_rate": 0.0001027262032723803,
      "loss": 0.536,
      "step": 15960
    },
    {
      "epoch": 0.5076400139775723,
      "grad_norm": 0.9940254092216492,
      "learning_rate": 0.0001025204974431121,
      "loss": 0.5211,
      "step": 15980
    },
    {
      "epoch": 0.5082753581752915,
      "grad_norm": 0.7856065630912781,
      "learning_rate": 0.00010231478094104216,
      "loss": 0.5137,
      "step": 16000
    },
    {
      "epoch": 0.5082753581752915,
      "eval_loss": 0.48191481828689575,
      "eval_runtime": 44.2211,
      "eval_samples_per_second": 61.125,
      "eval_steps_per_second": 30.574,
      "step": 16000
    },
    {
      "epoch": 0.5089107023730106,
      "grad_norm": 0.9363443851470947,
      "learning_rate": 0.00010210905463725703,
      "loss": 0.5426,
      "step": 16020
    },
    {
      "epoch": 0.5095460465707297,
      "grad_norm": 0.8720065355300903,
      "learning_rate": 0.0001019033194028848,
      "loss": 0.525,
      "step": 16040
    },
    {
      "epoch": 0.5101813907684488,
      "grad_norm": 0.9192999005317688,
      "learning_rate": 0.00010169757610909131,
      "loss": 0.5265,
      "step": 16060
    },
    {
      "epoch": 0.510816734966168,
      "grad_norm": 1.089529037475586,
      "learning_rate": 0.00010149182562707657,
      "loss": 0.5148,
      "step": 16080
    },
    {
      "epoch": 0.511452079163887,
      "grad_norm": 0.8161883354187012,
      "learning_rate": 0.00010128606882807106,
      "loss": 0.5441,
      "step": 16100
    },
    {
      "epoch": 0.5120874233616062,
      "grad_norm": 0.8635348081588745,
      "learning_rate": 0.00010108030658333192,
      "loss": 0.4981,
      "step": 16120
    },
    {
      "epoch": 0.5127227675593252,
      "grad_norm": 0.9366866946220398,
      "learning_rate": 0.00010087453976413943,
      "loss": 0.5155,
      "step": 16140
    },
    {
      "epoch": 0.5133581117570444,
      "grad_norm": 0.8161008954048157,
      "learning_rate": 0.00010066876924179321,
      "loss": 0.5178,
      "step": 16160
    },
    {
      "epoch": 0.5139934559547635,
      "grad_norm": 1.2926280498504639,
      "learning_rate": 0.00010046299588760855,
      "loss": 0.5409,
      "step": 16180
    },
    {
      "epoch": 0.5146288001524826,
      "grad_norm": 0.9963902235031128,
      "learning_rate": 0.00010025722057291273,
      "loss": 0.514,
      "step": 16200
    },
    {
      "epoch": 0.5152641443502017,
      "grad_norm": 0.7572094202041626,
      "learning_rate": 0.0001000514441690414,
      "loss": 0.5142,
      "step": 16220
    },
    {
      "epoch": 0.5158994885479208,
      "grad_norm": 0.7842695713043213,
      "learning_rate": 9.984566754733471e-05,
      "loss": 0.5419,
      "step": 16240
    },
    {
      "epoch": 0.5165348327456399,
      "grad_norm": 0.8259790539741516,
      "learning_rate": 9.96398915791338e-05,
      "loss": 0.5053,
      "step": 16260
    },
    {
      "epoch": 0.5171701769433591,
      "grad_norm": 0.7848758697509766,
      "learning_rate": 9.943411713577707e-05,
      "loss": 0.5129,
      "step": 16280
    },
    {
      "epoch": 0.5178055211410781,
      "grad_norm": 0.9001737236976624,
      "learning_rate": 9.922834508859636e-05,
      "loss": 0.5095,
      "step": 16300
    },
    {
      "epoch": 0.5184408653387973,
      "grad_norm": 1.2547895908355713,
      "learning_rate": 9.90225763089135e-05,
      "loss": 0.5402,
      "step": 16320
    },
    {
      "epoch": 0.5190762095365165,
      "grad_norm": 1.0412747859954834,
      "learning_rate": 9.881681166803634e-05,
      "loss": 0.5039,
      "step": 16340
    },
    {
      "epoch": 0.5197115537342355,
      "grad_norm": 0.8408613204956055,
      "learning_rate": 9.861105203725533e-05,
      "loss": 0.5256,
      "step": 16360
    },
    {
      "epoch": 0.5203468979319547,
      "grad_norm": 0.7325016856193542,
      "learning_rate": 9.840529828783965e-05,
      "loss": 0.5055,
      "step": 16380
    },
    {
      "epoch": 0.5209822421296737,
      "grad_norm": 1.3417218923568726,
      "learning_rate": 9.819955129103355e-05,
      "loss": 0.5336,
      "step": 16400
    },
    {
      "epoch": 0.5216175863273929,
      "grad_norm": 0.8016658425331116,
      "learning_rate": 9.799381191805272e-05,
      "loss": 0.5285,
      "step": 16420
    },
    {
      "epoch": 0.522252930525112,
      "grad_norm": 0.7678484916687012,
      "learning_rate": 9.778808104008059e-05,
      "loss": 0.5243,
      "step": 16440
    },
    {
      "epoch": 0.5228882747228311,
      "grad_norm": 1.0348572731018066,
      "learning_rate": 9.760293123314227e-05,
      "loss": 0.5305,
      "step": 16460
    },
    {
      "epoch": 0.5235236189205502,
      "grad_norm": 0.891635537147522,
      "learning_rate": 9.739721889566509e-05,
      "loss": 0.5258,
      "step": 16480
    },
    {
      "epoch": 0.5241589631182694,
      "grad_norm": 0.9525818824768066,
      "learning_rate": 9.719151757941184e-05,
      "loss": 0.5405,
      "step": 16500
    },
    {
      "epoch": 0.5247943073159884,
      "grad_norm": 0.8067079186439514,
      "learning_rate": 9.698582815540476e-05,
      "loss": 0.5058,
      "step": 16520
    },
    {
      "epoch": 0.5254296515137076,
      "grad_norm": 0.8525674939155579,
      "learning_rate": 9.678015149461577e-05,
      "loss": 0.5429,
      "step": 16540
    },
    {
      "epoch": 0.5260649957114266,
      "grad_norm": 0.9794461727142334,
      "learning_rate": 9.65744884679627e-05,
      "loss": 0.5106,
      "step": 16560
    },
    {
      "epoch": 0.5267003399091458,
      "grad_norm": 0.8107161521911621,
      "learning_rate": 9.636883994630567e-05,
      "loss": 0.5124,
      "step": 16580
    },
    {
      "epoch": 0.5273356841068649,
      "grad_norm": 0.8728024959564209,
      "learning_rate": 9.61632068004434e-05,
      "loss": 0.5483,
      "step": 16600
    },
    {
      "epoch": 0.527971028304584,
      "grad_norm": 1.0132850408554077,
      "learning_rate": 9.595758990110948e-05,
      "loss": 0.55,
      "step": 16620
    },
    {
      "epoch": 0.5286063725023031,
      "grad_norm": 1.0854065418243408,
      "learning_rate": 9.575199011896869e-05,
      "loss": 0.5022,
      "step": 16640
    },
    {
      "epoch": 0.5292417167000223,
      "grad_norm": 1.06479012966156,
      "learning_rate": 9.555668697368233e-05,
      "loss": 0.4932,
      "step": 16660
    },
    {
      "epoch": 0.5298770608977413,
      "grad_norm": 1.1619220972061157,
      "learning_rate": 9.535112307403999e-05,
      "loss": 0.5377,
      "step": 16680
    },
    {
      "epoch": 0.5305124050954605,
      "grad_norm": 1.1277661323547363,
      "learning_rate": 9.514557885961573e-05,
      "loss": 0.5267,
      "step": 16700
    },
    {
      "epoch": 0.5311477492931795,
      "grad_norm": 1.0196537971496582,
      "learning_rate": 9.494005520076655e-05,
      "loss": 0.5203,
      "step": 16720
    },
    {
      "epoch": 0.5317830934908987,
      "grad_norm": 0.9534218907356262,
      "learning_rate": 9.473455296776239e-05,
      "loss": 0.5177,
      "step": 16740
    },
    {
      "epoch": 0.5324184376886179,
      "grad_norm": 0.9330717325210571,
      "learning_rate": 9.45290730307826e-05,
      "loss": 0.55,
      "step": 16760
    },
    {
      "epoch": 0.5330537818863369,
      "grad_norm": 0.9290218949317932,
      "learning_rate": 9.43236162599119e-05,
      "loss": 0.5301,
      "step": 16780
    },
    {
      "epoch": 0.533689126084056,
      "grad_norm": 0.9842971563339233,
      "learning_rate": 9.411818352513715e-05,
      "loss": 0.4928,
      "step": 16800
    },
    {
      "epoch": 0.5343244702817751,
      "grad_norm": 0.9267326593399048,
      "learning_rate": 9.391277569634329e-05,
      "loss": 0.5443,
      "step": 16820
    },
    {
      "epoch": 0.5349598144794943,
      "grad_norm": 0.9270855784416199,
      "learning_rate": 9.370739364330982e-05,
      "loss": 0.5132,
      "step": 16840
    },
    {
      "epoch": 0.5355951586772134,
      "grad_norm": 0.9786942601203918,
      "learning_rate": 9.35020382357071e-05,
      "loss": 0.5229,
      "step": 16860
    },
    {
      "epoch": 0.5362305028749325,
      "grad_norm": 0.8397322297096252,
      "learning_rate": 9.329671034309269e-05,
      "loss": 0.5248,
      "step": 16880
    },
    {
      "epoch": 0.5368658470726516,
      "grad_norm": 0.9696868062019348,
      "learning_rate": 9.30914108349076e-05,
      "loss": 0.5635,
      "step": 16900
    },
    {
      "epoch": 0.5375011912703708,
      "grad_norm": 1.1376127004623413,
      "learning_rate": 9.28861405804727e-05,
      "loss": 0.548,
      "step": 16920
    },
    {
      "epoch": 0.5381365354680898,
      "grad_norm": 0.9028751254081726,
      "learning_rate": 9.268090044898489e-05,
      "loss": 0.5253,
      "step": 16940
    },
    {
      "epoch": 0.538771879665809,
      "grad_norm": 0.7549586296081543,
      "learning_rate": 9.247569130951365e-05,
      "loss": 0.5119,
      "step": 16960
    },
    {
      "epoch": 0.539407223863528,
      "grad_norm": 1.002920150756836,
      "learning_rate": 9.227051403099715e-05,
      "loss": 0.5383,
      "step": 16980
    },
    {
      "epoch": 0.5400425680612472,
      "grad_norm": 0.7857794761657715,
      "learning_rate": 9.206536948223862e-05,
      "loss": 0.4943,
      "step": 17000
    },
    {
      "epoch": 0.5400425680612472,
      "eval_loss": 0.47516322135925293,
      "eval_runtime": 44.9681,
      "eval_samples_per_second": 60.109,
      "eval_steps_per_second": 30.066,
      "step": 17000
    },
    {
      "epoch": 0.5406779122589663,
      "grad_norm": 0.8384699821472168,
      "learning_rate": 9.186025853190276e-05,
      "loss": 0.5005,
      "step": 17020
    },
    {
      "epoch": 0.5413132564566854,
      "grad_norm": 0.859467089176178,
      "learning_rate": 9.1655182048512e-05,
      "loss": 0.486,
      "step": 17040
    },
    {
      "epoch": 0.5419486006544045,
      "grad_norm": 0.9178836345672607,
      "learning_rate": 9.145014090044276e-05,
      "loss": 0.4866,
      "step": 17060
    },
    {
      "epoch": 0.5425839448521237,
      "grad_norm": 1.5116227865219116,
      "learning_rate": 9.12451359559219e-05,
      "loss": 0.5103,
      "step": 17080
    },
    {
      "epoch": 0.5432192890498427,
      "grad_norm": 0.8251123428344727,
      "learning_rate": 9.104016808302297e-05,
      "loss": 0.5403,
      "step": 17100
    },
    {
      "epoch": 0.5438546332475619,
      "grad_norm": 0.8845348358154297,
      "learning_rate": 9.08352381496625e-05,
      "loss": 0.5295,
      "step": 17120
    },
    {
      "epoch": 0.5444899774452809,
      "grad_norm": 0.8761606812477112,
      "learning_rate": 9.063034702359643e-05,
      "loss": 0.5175,
      "step": 17140
    },
    {
      "epoch": 0.5451253216430001,
      "grad_norm": 0.8992062211036682,
      "learning_rate": 9.042549557241629e-05,
      "loss": 0.5211,
      "step": 17160
    },
    {
      "epoch": 0.5457606658407193,
      "grad_norm": 1.0609464645385742,
      "learning_rate": 9.022068466354573e-05,
      "loss": 0.5231,
      "step": 17180
    },
    {
      "epoch": 0.5463960100384383,
      "grad_norm": 1.1660939455032349,
      "learning_rate": 9.001591516423664e-05,
      "loss": 0.5097,
      "step": 17200
    },
    {
      "epoch": 0.5470313542361575,
      "grad_norm": 0.8982824683189392,
      "learning_rate": 8.981118794156556e-05,
      "loss": 0.499,
      "step": 17220
    },
    {
      "epoch": 0.5476666984338765,
      "grad_norm": 0.9423658847808838,
      "learning_rate": 8.960650386243009e-05,
      "loss": 0.5023,
      "step": 17240
    },
    {
      "epoch": 0.5483020426315957,
      "grad_norm": 0.781741738319397,
      "learning_rate": 8.940186379354505e-05,
      "loss": 0.5098,
      "step": 17260
    },
    {
      "epoch": 0.5489373868293148,
      "grad_norm": 0.9678505063056946,
      "learning_rate": 8.919726860143895e-05,
      "loss": 0.5005,
      "step": 17280
    },
    {
      "epoch": 0.5495727310270339,
      "grad_norm": 0.9400302171707153,
      "learning_rate": 8.899271915245028e-05,
      "loss": 0.537,
      "step": 17300
    },
    {
      "epoch": 0.550208075224753,
      "grad_norm": 0.8072425127029419,
      "learning_rate": 8.878821631272384e-05,
      "loss": 0.5073,
      "step": 17320
    },
    {
      "epoch": 0.5508434194224722,
      "grad_norm": 0.9000498652458191,
      "learning_rate": 8.858376094820701e-05,
      "loss": 0.5014,
      "step": 17340
    },
    {
      "epoch": 0.5514787636201912,
      "grad_norm": 0.9222893118858337,
      "learning_rate": 8.837935392464621e-05,
      "loss": 0.5216,
      "step": 17360
    },
    {
      "epoch": 0.5521141078179104,
      "grad_norm": 0.8468360304832458,
      "learning_rate": 8.817499610758316e-05,
      "loss": 0.5282,
      "step": 17380
    },
    {
      "epoch": 0.5527494520156294,
      "grad_norm": 0.7120311260223389,
      "learning_rate": 8.797068836235116e-05,
      "loss": 0.5277,
      "step": 17400
    },
    {
      "epoch": 0.5533847962133486,
      "grad_norm": 0.880155622959137,
      "learning_rate": 8.776643155407154e-05,
      "loss": 0.523,
      "step": 17420
    },
    {
      "epoch": 0.5540201404110677,
      "grad_norm": 1.023587703704834,
      "learning_rate": 8.756222654764996e-05,
      "loss": 0.508,
      "step": 17440
    },
    {
      "epoch": 0.5546554846087868,
      "grad_norm": 0.8903362154960632,
      "learning_rate": 8.735807420777262e-05,
      "loss": 0.5165,
      "step": 17460
    },
    {
      "epoch": 0.5552908288065059,
      "grad_norm": 0.7317694425582886,
      "learning_rate": 8.715397539890287e-05,
      "loss": 0.4672,
      "step": 17480
    },
    {
      "epoch": 0.5559261730042251,
      "grad_norm": 1.0228464603424072,
      "learning_rate": 8.694993098527723e-05,
      "loss": 0.5112,
      "step": 17500
    },
    {
      "epoch": 0.5565615172019441,
      "grad_norm": 0.7797629237174988,
      "learning_rate": 8.674594183090199e-05,
      "loss": 0.477,
      "step": 17520
    },
    {
      "epoch": 0.5571968613996633,
      "grad_norm": 0.8488342761993408,
      "learning_rate": 8.654200879954945e-05,
      "loss": 0.4993,
      "step": 17540
    },
    {
      "epoch": 0.5578322055973823,
      "grad_norm": 0.8529194593429565,
      "learning_rate": 8.63381327547542e-05,
      "loss": 0.5293,
      "step": 17560
    },
    {
      "epoch": 0.5584675497951015,
      "grad_norm": 0.9537157416343689,
      "learning_rate": 8.613431455980955e-05,
      "loss": 0.5047,
      "step": 17580
    },
    {
      "epoch": 0.5591028939928206,
      "grad_norm": 0.8697558045387268,
      "learning_rate": 8.593055507776393e-05,
      "loss": 0.5293,
      "step": 17600
    },
    {
      "epoch": 0.5597382381905397,
      "grad_norm": 0.8306463360786438,
      "learning_rate": 8.5726855171417e-05,
      "loss": 0.5075,
      "step": 17620
    },
    {
      "epoch": 0.5603735823882589,
      "grad_norm": 0.8880159258842468,
      "learning_rate": 8.55232157033163e-05,
      "loss": 0.5149,
      "step": 17640
    },
    {
      "epoch": 0.561008926585978,
      "grad_norm": 0.9390746355056763,
      "learning_rate": 8.531963753575334e-05,
      "loss": 0.5196,
      "step": 17660
    },
    {
      "epoch": 0.561644270783697,
      "grad_norm": 0.968285322189331,
      "learning_rate": 8.511612153076015e-05,
      "loss": 0.5229,
      "step": 17680
    },
    {
      "epoch": 0.5622796149814162,
      "grad_norm": 0.9114767909049988,
      "learning_rate": 8.491266855010548e-05,
      "loss": 0.5008,
      "step": 17700
    },
    {
      "epoch": 0.5629149591791353,
      "grad_norm": 0.9089644551277161,
      "learning_rate": 8.470927945529123e-05,
      "loss": 0.4848,
      "step": 17720
    },
    {
      "epoch": 0.5635503033768544,
      "grad_norm": 0.7264979481697083,
      "learning_rate": 8.450595510754877e-05,
      "loss": 0.5155,
      "step": 17740
    },
    {
      "epoch": 0.5641856475745736,
      "grad_norm": 0.9070448875427246,
      "learning_rate": 8.430269636783534e-05,
      "loss": 0.524,
      "step": 17760
    },
    {
      "epoch": 0.5648209917722926,
      "grad_norm": 0.9725968241691589,
      "learning_rate": 8.40995040968303e-05,
      "loss": 0.4925,
      "step": 17780
    },
    {
      "epoch": 0.5654563359700118,
      "grad_norm": 0.8976007103919983,
      "learning_rate": 8.389637915493162e-05,
      "loss": 0.4937,
      "step": 17800
    },
    {
      "epoch": 0.5660916801677308,
      "grad_norm": 0.9926420450210571,
      "learning_rate": 8.369332240225214e-05,
      "loss": 0.5181,
      "step": 17820
    },
    {
      "epoch": 0.56672702436545,
      "grad_norm": 0.852676272392273,
      "learning_rate": 8.349033469861598e-05,
      "loss": 0.5175,
      "step": 17840
    },
    {
      "epoch": 0.5673623685631691,
      "grad_norm": 0.8739320635795593,
      "learning_rate": 8.328741690355487e-05,
      "loss": 0.4805,
      "step": 17860
    },
    {
      "epoch": 0.5679977127608882,
      "grad_norm": 0.9660511016845703,
      "learning_rate": 8.308456987630449e-05,
      "loss": 0.5063,
      "step": 17880
    },
    {
      "epoch": 0.5686330569586073,
      "grad_norm": 0.9321526288986206,
      "learning_rate": 8.288179447580088e-05,
      "loss": 0.4994,
      "step": 17900
    },
    {
      "epoch": 0.5692684011563265,
      "grad_norm": 1.0359587669372559,
      "learning_rate": 8.267909156067685e-05,
      "loss": 0.5279,
      "step": 17920
    },
    {
      "epoch": 0.5699037453540455,
      "grad_norm": 0.9722701907157898,
      "learning_rate": 8.247646198925813e-05,
      "loss": 0.5061,
      "step": 17940
    },
    {
      "epoch": 0.5705390895517647,
      "grad_norm": 0.854860782623291,
      "learning_rate": 8.227390661956006e-05,
      "loss": 0.4827,
      "step": 17960
    },
    {
      "epoch": 0.5711744337494837,
      "grad_norm": 0.8997724652290344,
      "learning_rate": 8.207142630928362e-05,
      "loss": 0.4978,
      "step": 17980
    },
    {
      "epoch": 0.5718097779472029,
      "grad_norm": 0.9234896898269653,
      "learning_rate": 8.186902191581205e-05,
      "loss": 0.4982,
      "step": 18000
    },
    {
      "epoch": 0.5718097779472029,
      "eval_loss": 0.469827800989151,
      "eval_runtime": 44.8258,
      "eval_samples_per_second": 60.3,
      "eval_steps_per_second": 30.161,
      "step": 18000
    },
    {
      "epoch": 0.572445122144922,
      "grad_norm": 0.8457797169685364,
      "learning_rate": 8.166669429620712e-05,
      "loss": 0.5263,
      "step": 18020
    },
    {
      "epoch": 0.5730804663426411,
      "grad_norm": 0.8909218907356262,
      "learning_rate": 8.146444430720545e-05,
      "loss": 0.5045,
      "step": 18040
    },
    {
      "epoch": 0.5737158105403602,
      "grad_norm": 0.950072705745697,
      "learning_rate": 8.126227280521503e-05,
      "loss": 0.5247,
      "step": 18060
    },
    {
      "epoch": 0.5743511547380794,
      "grad_norm": 0.9507225751876831,
      "learning_rate": 8.106018064631148e-05,
      "loss": 0.4851,
      "step": 18080
    },
    {
      "epoch": 0.5749864989357985,
      "grad_norm": 1.0232789516448975,
      "learning_rate": 8.085816868623436e-05,
      "loss": 0.5457,
      "step": 18100
    },
    {
      "epoch": 0.5756218431335176,
      "grad_norm": 1.0967813730239868,
      "learning_rate": 8.065623778038377e-05,
      "loss": 0.52,
      "step": 18120
    },
    {
      "epoch": 0.5762571873312367,
      "grad_norm": 0.7866876125335693,
      "learning_rate": 8.045438878381649e-05,
      "loss": 0.5117,
      "step": 18140
    },
    {
      "epoch": 0.5768925315289558,
      "grad_norm": 0.9325518012046814,
      "learning_rate": 8.025262255124248e-05,
      "loss": 0.5415,
      "step": 18160
    },
    {
      "epoch": 0.577527875726675,
      "grad_norm": 0.8899424076080322,
      "learning_rate": 8.005093993702133e-05,
      "loss": 0.4947,
      "step": 18180
    },
    {
      "epoch": 0.578163219924394,
      "grad_norm": 1.0050842761993408,
      "learning_rate": 7.984934179515843e-05,
      "loss": 0.4863,
      "step": 18200
    },
    {
      "epoch": 0.5787985641221132,
      "grad_norm": 0.836564302444458,
      "learning_rate": 7.964782897930158e-05,
      "loss": 0.5055,
      "step": 18220
    },
    {
      "epoch": 0.5794339083198322,
      "grad_norm": 1.032029628753662,
      "learning_rate": 7.944640234273724e-05,
      "loss": 0.4919,
      "step": 18240
    },
    {
      "epoch": 0.5800692525175514,
      "grad_norm": 0.854015588760376,
      "learning_rate": 7.92450627383869e-05,
      "loss": 0.5108,
      "step": 18260
    },
    {
      "epoch": 0.5807045967152705,
      "grad_norm": 1.0629216432571411,
      "learning_rate": 7.904381101880364e-05,
      "loss": 0.5312,
      "step": 18280
    },
    {
      "epoch": 0.5813399409129896,
      "grad_norm": 0.8146398067474365,
      "learning_rate": 7.884264803616827e-05,
      "loss": 0.5203,
      "step": 18300
    },
    {
      "epoch": 0.5819752851107087,
      "grad_norm": 1.1307437419891357,
      "learning_rate": 7.864157464228593e-05,
      "loss": 0.5325,
      "step": 18320
    },
    {
      "epoch": 0.5826106293084279,
      "grad_norm": 0.9609930515289307,
      "learning_rate": 7.844059168858241e-05,
      "loss": 0.5034,
      "step": 18340
    },
    {
      "epoch": 0.5832459735061469,
      "grad_norm": 0.8615232110023499,
      "learning_rate": 7.823970002610048e-05,
      "loss": 0.522,
      "step": 18360
    },
    {
      "epoch": 0.5838813177038661,
      "grad_norm": 1.014160394668579,
      "learning_rate": 7.803890050549641e-05,
      "loss": 0.5104,
      "step": 18380
    },
    {
      "epoch": 0.5845166619015851,
      "grad_norm": 1.015424370765686,
      "learning_rate": 7.78381939770363e-05,
      "loss": 0.4887,
      "step": 18400
    },
    {
      "epoch": 0.5851520060993043,
      "grad_norm": 1.0072382688522339,
      "learning_rate": 7.763758129059243e-05,
      "loss": 0.5242,
      "step": 18420
    },
    {
      "epoch": 0.5857873502970234,
      "grad_norm": 1.122096300125122,
      "learning_rate": 7.743706329563971e-05,
      "loss": 0.5408,
      "step": 18440
    },
    {
      "epoch": 0.5864226944947425,
      "grad_norm": 0.8347269296646118,
      "learning_rate": 7.723664084125218e-05,
      "loss": 0.5112,
      "step": 18460
    },
    {
      "epoch": 0.5870580386924616,
      "grad_norm": 0.9214980006217957,
      "learning_rate": 7.703631477609926e-05,
      "loss": 0.5111,
      "step": 18480
    },
    {
      "epoch": 0.5876933828901808,
      "grad_norm": 0.8427157402038574,
      "learning_rate": 7.683608594844218e-05,
      "loss": 0.5199,
      "step": 18500
    },
    {
      "epoch": 0.5883287270878998,
      "grad_norm": 0.8485844731330872,
      "learning_rate": 7.663595520613054e-05,
      "loss": 0.5193,
      "step": 18520
    },
    {
      "epoch": 0.588964071285619,
      "grad_norm": 0.8761444687843323,
      "learning_rate": 7.643592339659848e-05,
      "loss": 0.5044,
      "step": 18540
    },
    {
      "epoch": 0.589599415483338,
      "grad_norm": 0.9373889565467834,
      "learning_rate": 7.623599136686133e-05,
      "loss": 0.493,
      "step": 18560
    },
    {
      "epoch": 0.5902347596810572,
      "grad_norm": 0.9052358269691467,
      "learning_rate": 7.603615996351184e-05,
      "loss": 0.516,
      "step": 18580
    },
    {
      "epoch": 0.5908701038787764,
      "grad_norm": 0.7757846117019653,
      "learning_rate": 7.583643003271668e-05,
      "loss": 0.5043,
      "step": 18600
    },
    {
      "epoch": 0.5915054480764954,
      "grad_norm": 0.7769386172294617,
      "learning_rate": 7.563680242021285e-05,
      "loss": 0.5005,
      "step": 18620
    },
    {
      "epoch": 0.5921407922742146,
      "grad_norm": 0.7892422080039978,
      "learning_rate": 7.543727797130413e-05,
      "loss": 0.4982,
      "step": 18640
    },
    {
      "epoch": 0.5927761364719337,
      "grad_norm": 1.0471646785736084,
      "learning_rate": 7.524782606964114e-05,
      "loss": 0.5139,
      "step": 18660
    },
    {
      "epoch": 0.5934114806696528,
      "grad_norm": 0.7995429039001465,
      "learning_rate": 7.504850521939017e-05,
      "loss": 0.4736,
      "step": 18680
    },
    {
      "epoch": 0.5940468248673719,
      "grad_norm": 0.9799679517745972,
      "learning_rate": 7.484929002382169e-05,
      "loss": 0.5033,
      "step": 18700
    },
    {
      "epoch": 0.594682169065091,
      "grad_norm": 0.8607106804847717,
      "learning_rate": 7.465018132649311e-05,
      "loss": 0.498,
      "step": 18720
    },
    {
      "epoch": 0.5953175132628101,
      "grad_norm": 0.9690695405006409,
      "learning_rate": 7.445117997051085e-05,
      "loss": 0.4898,
      "step": 18740
    },
    {
      "epoch": 0.5959528574605293,
      "grad_norm": 1.331871747970581,
      "learning_rate": 7.425228679852684e-05,
      "loss": 0.5044,
      "step": 18760
    },
    {
      "epoch": 0.5965882016582483,
      "grad_norm": 0.9347879886627197,
      "learning_rate": 7.405350265273492e-05,
      "loss": 0.5088,
      "step": 18780
    },
    {
      "epoch": 0.5972235458559675,
      "grad_norm": 0.8495462536811829,
      "learning_rate": 7.385482837486725e-05,
      "loss": 0.5078,
      "step": 18800
    },
    {
      "epoch": 0.5978588900536865,
      "grad_norm": 1.318202257156372,
      "learning_rate": 7.365626480619081e-05,
      "loss": 0.5014,
      "step": 18820
    },
    {
      "epoch": 0.5984942342514057,
      "grad_norm": 1.0349724292755127,
      "learning_rate": 7.345781278750368e-05,
      "loss": 0.531,
      "step": 18840
    },
    {
      "epoch": 0.5991295784491248,
      "grad_norm": 1.047760248184204,
      "learning_rate": 7.326938745831322e-05,
      "loss": 0.4925,
      "step": 18860
    },
    {
      "epoch": 0.5997649226468439,
      "grad_norm": 0.874220073223114,
      "learning_rate": 7.307115537865903e-05,
      "loss": 0.5056,
      "step": 18880
    },
    {
      "epoch": 0.600400266844563,
      "grad_norm": 0.738158106803894,
      "learning_rate": 7.287303732658328e-05,
      "loss": 0.4938,
      "step": 18900
    },
    {
      "epoch": 0.6010356110422822,
      "grad_norm": 0.8721213936805725,
      "learning_rate": 7.267503414099758e-05,
      "loss": 0.5074,
      "step": 18920
    },
    {
      "epoch": 0.6016709552400012,
      "grad_norm": 0.7241856455802917,
      "learning_rate": 7.247714666032724e-05,
      "loss": 0.5045,
      "step": 18940
    },
    {
      "epoch": 0.6023062994377204,
      "grad_norm": 1.0385938882827759,
      "learning_rate": 7.227937572250761e-05,
      "loss": 0.5313,
      "step": 18960
    },
    {
      "epoch": 0.6029416436354395,
      "grad_norm": 1.8555858135223389,
      "learning_rate": 7.208172216498046e-05,
      "loss": 0.4989,
      "step": 18980
    },
    {
      "epoch": 0.6035769878331586,
      "grad_norm": 0.9453182816505432,
      "learning_rate": 7.188418682469064e-05,
      "loss": 0.5146,
      "step": 19000
    },
    {
      "epoch": 0.6035769878331586,
      "eval_loss": 0.46334323287010193,
      "eval_runtime": 44.8428,
      "eval_samples_per_second": 60.277,
      "eval_steps_per_second": 30.15,
      "step": 19000
    },
    {
      "epoch": 0.6042123320308778,
      "grad_norm": 0.9362254738807678,
      "learning_rate": 7.168677053808237e-05,
      "loss": 0.5148,
      "step": 19020
    },
    {
      "epoch": 0.6048476762285968,
      "grad_norm": 1.19162917137146,
      "learning_rate": 7.148947414109572e-05,
      "loss": 0.4954,
      "step": 19040
    },
    {
      "epoch": 0.605483020426316,
      "grad_norm": 0.9854863286018372,
      "learning_rate": 7.129229846916318e-05,
      "loss": 0.5173,
      "step": 19060
    },
    {
      "epoch": 0.6061183646240351,
      "grad_norm": 0.8435449600219727,
      "learning_rate": 7.109524435720597e-05,
      "loss": 0.5154,
      "step": 19080
    },
    {
      "epoch": 0.6067537088217542,
      "grad_norm": 0.920364260673523,
      "learning_rate": 7.08983126396306e-05,
      "loss": 0.5092,
      "step": 19100
    },
    {
      "epoch": 0.6073890530194733,
      "grad_norm": 1.2439565658569336,
      "learning_rate": 7.070150415032527e-05,
      "loss": 0.511,
      "step": 19120
    },
    {
      "epoch": 0.6080243972171924,
      "grad_norm": 0.7429732084274292,
      "learning_rate": 7.050481972265648e-05,
      "loss": 0.4787,
      "step": 19140
    },
    {
      "epoch": 0.6086597414149115,
      "grad_norm": 0.6966003179550171,
      "learning_rate": 7.03082601894653e-05,
      "loss": 0.5237,
      "step": 19160
    },
    {
      "epoch": 0.6092950856126307,
      "grad_norm": 0.8211964964866638,
      "learning_rate": 7.011182638306402e-05,
      "loss": 0.5349,
      "step": 19180
    },
    {
      "epoch": 0.6099304298103497,
      "grad_norm": 0.9803711771965027,
      "learning_rate": 6.991551913523253e-05,
      "loss": 0.5369,
      "step": 19200
    },
    {
      "epoch": 0.6105657740080689,
      "grad_norm": 0.9161061644554138,
      "learning_rate": 6.971933927721479e-05,
      "loss": 0.4993,
      "step": 19220
    },
    {
      "epoch": 0.611201118205788,
      "grad_norm": 0.9608227014541626,
      "learning_rate": 6.952328763971537e-05,
      "loss": 0.4837,
      "step": 19240
    },
    {
      "epoch": 0.6118364624035071,
      "grad_norm": 0.9438381195068359,
      "learning_rate": 6.932736505289592e-05,
      "loss": 0.479,
      "step": 19260
    },
    {
      "epoch": 0.6124718066012262,
      "grad_norm": 1.571315884590149,
      "learning_rate": 6.91315723463716e-05,
      "loss": 0.5417,
      "step": 19280
    },
    {
      "epoch": 0.6131071507989453,
      "grad_norm": 0.8187804818153381,
      "learning_rate": 6.893591034920763e-05,
      "loss": 0.5189,
      "step": 19300
    },
    {
      "epoch": 0.6137424949966644,
      "grad_norm": 0.7617794871330261,
      "learning_rate": 6.87403798899157e-05,
      "loss": 0.468,
      "step": 19320
    },
    {
      "epoch": 0.6143778391943836,
      "grad_norm": 0.8723959922790527,
      "learning_rate": 6.85449817964506e-05,
      "loss": 0.5044,
      "step": 19340
    },
    {
      "epoch": 0.6150131833921026,
      "grad_norm": 0.7760429382324219,
      "learning_rate": 6.834971689620659e-05,
      "loss": 0.4922,
      "step": 19360
    },
    {
      "epoch": 0.6156485275898218,
      "grad_norm": 0.925581693649292,
      "learning_rate": 6.815458601601392e-05,
      "loss": 0.5079,
      "step": 19380
    },
    {
      "epoch": 0.6162838717875408,
      "grad_norm": 0.8069369792938232,
      "learning_rate": 6.795958998213535e-05,
      "loss": 0.4995,
      "step": 19400
    },
    {
      "epoch": 0.61691921598526,
      "grad_norm": 1.3501884937286377,
      "learning_rate": 6.77647296202627e-05,
      "loss": 0.4906,
      "step": 19420
    },
    {
      "epoch": 0.6175545601829792,
      "grad_norm": 0.9078099131584167,
      "learning_rate": 6.75700057555132e-05,
      "loss": 0.4983,
      "step": 19440
    },
    {
      "epoch": 0.6181899043806982,
      "grad_norm": 0.7792625427246094,
      "learning_rate": 6.737541921242619e-05,
      "loss": 0.4869,
      "step": 19460
    },
    {
      "epoch": 0.6188252485784174,
      "grad_norm": 0.8952593803405762,
      "learning_rate": 6.718097081495947e-05,
      "loss": 0.4975,
      "step": 19480
    },
    {
      "epoch": 0.6194605927761365,
      "grad_norm": 0.9192362427711487,
      "learning_rate": 6.698666138648593e-05,
      "loss": 0.5059,
      "step": 19500
    },
    {
      "epoch": 0.6200959369738556,
      "grad_norm": 0.8911659121513367,
      "learning_rate": 6.679249174978997e-05,
      "loss": 0.5014,
      "step": 19520
    },
    {
      "epoch": 0.6207312811715747,
      "grad_norm": 0.9853730201721191,
      "learning_rate": 6.659846272706406e-05,
      "loss": 0.4935,
      "step": 19540
    },
    {
      "epoch": 0.6213666253692938,
      "grad_norm": 1.3485686779022217,
      "learning_rate": 6.640457513990527e-05,
      "loss": 0.5061,
      "step": 19560
    },
    {
      "epoch": 0.6220019695670129,
      "grad_norm": 0.8757696747779846,
      "learning_rate": 6.621082980931179e-05,
      "loss": 0.4869,
      "step": 19580
    },
    {
      "epoch": 0.6226373137647321,
      "grad_norm": 1.0088223218917847,
      "learning_rate": 6.601722755567937e-05,
      "loss": 0.5138,
      "step": 19600
    },
    {
      "epoch": 0.6232726579624511,
      "grad_norm": 0.94034343957901,
      "learning_rate": 6.582376919879798e-05,
      "loss": 0.5159,
      "step": 19620
    },
    {
      "epoch": 0.6239080021601703,
      "grad_norm": 0.834994375705719,
      "learning_rate": 6.563045555784826e-05,
      "loss": 0.4862,
      "step": 19640
    },
    {
      "epoch": 0.6245433463578894,
      "grad_norm": 1.2617956399917603,
      "learning_rate": 6.543728745139802e-05,
      "loss": 0.5112,
      "step": 19660
    },
    {
      "epoch": 0.6251786905556085,
      "grad_norm": 0.8542491793632507,
      "learning_rate": 6.524426569739892e-05,
      "loss": 0.5234,
      "step": 19680
    },
    {
      "epoch": 0.6258140347533276,
      "grad_norm": 1.162606120109558,
      "learning_rate": 6.505139111318277e-05,
      "loss": 0.4772,
      "step": 19700
    },
    {
      "epoch": 0.6264493789510467,
      "grad_norm": 1.0025289058685303,
      "learning_rate": 6.48586645154583e-05,
      "loss": 0.5212,
      "step": 19720
    },
    {
      "epoch": 0.6270847231487658,
      "grad_norm": 1.0566537380218506,
      "learning_rate": 6.466608672030763e-05,
      "loss": 0.5556,
      "step": 19740
    },
    {
      "epoch": 0.627720067346485,
      "grad_norm": 1.0380536317825317,
      "learning_rate": 6.447365854318266e-05,
      "loss": 0.4827,
      "step": 19760
    },
    {
      "epoch": 0.628355411544204,
      "grad_norm": 1.0499038696289062,
      "learning_rate": 6.42813807989019e-05,
      "loss": 0.5316,
      "step": 19780
    },
    {
      "epoch": 0.6289907557419232,
      "grad_norm": 0.7457720637321472,
      "learning_rate": 6.408925430164669e-05,
      "loss": 0.5055,
      "step": 19800
    },
    {
      "epoch": 0.6296260999396422,
      "grad_norm": 1.2990676164627075,
      "learning_rate": 6.389727986495813e-05,
      "loss": 0.5068,
      "step": 19820
    },
    {
      "epoch": 0.6302614441373614,
      "grad_norm": 0.9500844478607178,
      "learning_rate": 6.370545830173332e-05,
      "loss": 0.4889,
      "step": 19840
    },
    {
      "epoch": 0.6308967883350806,
      "grad_norm": 0.7668824195861816,
      "learning_rate": 6.351379042422199e-05,
      "loss": 0.5314,
      "step": 19860
    },
    {
      "epoch": 0.6315321325327996,
      "grad_norm": 0.9457335472106934,
      "learning_rate": 6.332227704402321e-05,
      "loss": 0.4898,
      "step": 19880
    },
    {
      "epoch": 0.6321674767305188,
      "grad_norm": 0.8252271413803101,
      "learning_rate": 6.31309189720818e-05,
      "loss": 0.5045,
      "step": 19900
    },
    {
      "epoch": 0.6328028209282379,
      "grad_norm": 0.9943385720252991,
      "learning_rate": 6.29397170186849e-05,
      "loss": 0.5243,
      "step": 19920
    },
    {
      "epoch": 0.633438165125957,
      "grad_norm": 1.1582151651382446,
      "learning_rate": 6.27582205051849e-05,
      "loss": 0.5331,
      "step": 19940
    },
    {
      "epoch": 0.6340735093236761,
      "grad_norm": 0.9436770677566528,
      "learning_rate": 6.256732531103176e-05,
      "loss": 0.4903,
      "step": 19960
    },
    {
      "epoch": 0.6347088535213952,
      "grad_norm": 0.8253883123397827,
      "learning_rate": 6.237658862190583e-05,
      "loss": 0.4934,
      "step": 19980
    },
    {
      "epoch": 0.6353441977191143,
      "grad_norm": 0.8770557641983032,
      "learning_rate": 6.21860112454631e-05,
      "loss": 0.5202,
      "step": 20000
    },
    {
      "epoch": 0.6353441977191143,
      "eval_loss": 0.45828375220298767,
      "eval_runtime": 44.5614,
      "eval_samples_per_second": 60.658,
      "eval_steps_per_second": 30.34,
      "step": 20000
    },
    {
      "epoch": 0.6359795419168335,
      "grad_norm": 1.2218546867370605,
      "learning_rate": 6.19955939886849e-05,
      "loss": 0.5171,
      "step": 20020
    },
    {
      "epoch": 0.6366148861145525,
      "grad_norm": 0.8330618143081665,
      "learning_rate": 6.180533765787468e-05,
      "loss": 0.4863,
      "step": 20040
    },
    {
      "epoch": 0.6372502303122717,
      "grad_norm": 1.0419652462005615,
      "learning_rate": 6.162474393506114e-05,
      "loss": 0.5427,
      "step": 20060
    },
    {
      "epoch": 0.6378855745099908,
      "grad_norm": 0.9472757577896118,
      "learning_rate": 6.143480372643493e-05,
      "loss": 0.5245,
      "step": 20080
    },
    {
      "epoch": 0.6385209187077099,
      "grad_norm": 0.7603405117988586,
      "learning_rate": 6.12450268183886e-05,
      "loss": 0.4964,
      "step": 20100
    },
    {
      "epoch": 0.639156262905429,
      "grad_norm": 0.8776742219924927,
      "learning_rate": 6.105541401451404e-05,
      "loss": 0.4966,
      "step": 20120
    },
    {
      "epoch": 0.6397916071031481,
      "grad_norm": 0.8271143436431885,
      "learning_rate": 6.086596611770831e-05,
      "loss": 0.5119,
      "step": 20140
    },
    {
      "epoch": 0.6404269513008672,
      "grad_norm": 1.1509547233581543,
      "learning_rate": 6.067668393017007e-05,
      "loss": 0.5031,
      "step": 20160
    },
    {
      "epoch": 0.6410622954985864,
      "grad_norm": 0.8693366050720215,
      "learning_rate": 6.048756825339643e-05,
      "loss": 0.4986,
      "step": 20180
    },
    {
      "epoch": 0.6416976396963054,
      "grad_norm": 0.949834942817688,
      "learning_rate": 6.029861988817935e-05,
      "loss": 0.4921,
      "step": 20200
    },
    {
      "epoch": 0.6423329838940246,
      "grad_norm": 0.9004225730895996,
      "learning_rate": 6.010983963460233e-05,
      "loss": 0.5023,
      "step": 20220
    },
    {
      "epoch": 0.6429683280917438,
      "grad_norm": 0.7829142808914185,
      "learning_rate": 5.9921228292037026e-05,
      "loss": 0.507,
      "step": 20240
    },
    {
      "epoch": 0.6436036722894628,
      "grad_norm": 1.1816707849502563,
      "learning_rate": 5.973278665913985e-05,
      "loss": 0.4926,
      "step": 20260
    },
    {
      "epoch": 0.644239016487182,
      "grad_norm": 0.881648063659668,
      "learning_rate": 5.9544515533848614e-05,
      "loss": 0.4885,
      "step": 20280
    },
    {
      "epoch": 0.644874360684901,
      "grad_norm": 0.9568135738372803,
      "learning_rate": 5.9356415713379145e-05,
      "loss": 0.515,
      "step": 20300
    },
    {
      "epoch": 0.6455097048826202,
      "grad_norm": 0.9377472400665283,
      "learning_rate": 5.9168487994221834e-05,
      "loss": 0.4886,
      "step": 20320
    },
    {
      "epoch": 0.6461450490803393,
      "grad_norm": 0.9032811522483826,
      "learning_rate": 5.898073317213837e-05,
      "loss": 0.5064,
      "step": 20340
    },
    {
      "epoch": 0.6467803932780584,
      "grad_norm": 0.9788734316825867,
      "learning_rate": 5.879315204215836e-05,
      "loss": 0.4698,
      "step": 20360
    },
    {
      "epoch": 0.6474157374757775,
      "grad_norm": 1.0353432893753052,
      "learning_rate": 5.860574539857584e-05,
      "loss": 0.5227,
      "step": 20380
    },
    {
      "epoch": 0.6480510816734966,
      "grad_norm": 0.8998845815658569,
      "learning_rate": 5.84185140349461e-05,
      "loss": 0.5132,
      "step": 20400
    },
    {
      "epoch": 0.6486864258712157,
      "grad_norm": 0.8317026495933533,
      "learning_rate": 5.82314587440821e-05,
      "loss": 0.468,
      "step": 20420
    },
    {
      "epoch": 0.6493217700689349,
      "grad_norm": 0.7740748524665833,
      "learning_rate": 5.80445803180514e-05,
      "loss": 0.5119,
      "step": 20440
    },
    {
      "epoch": 0.6499571142666539,
      "grad_norm": 1.0922515392303467,
      "learning_rate": 5.78578795481725e-05,
      "loss": 0.5284,
      "step": 20460
    },
    {
      "epoch": 0.6505924584643731,
      "grad_norm": 0.8265649676322937,
      "learning_rate": 5.76713572250117e-05,
      "loss": 0.5095,
      "step": 20480
    },
    {
      "epoch": 0.6512278026620922,
      "grad_norm": 1.0644861459732056,
      "learning_rate": 5.748501413837963e-05,
      "loss": 0.5028,
      "step": 20500
    },
    {
      "epoch": 0.6518631468598113,
      "grad_norm": 0.9139828681945801,
      "learning_rate": 5.729885107732808e-05,
      "loss": 0.4814,
      "step": 20520
    },
    {
      "epoch": 0.6524984910575304,
      "grad_norm": 0.7917624115943909,
      "learning_rate": 5.7112868830146416e-05,
      "loss": 0.4772,
      "step": 20540
    },
    {
      "epoch": 0.6531338352552495,
      "grad_norm": 0.7677121162414551,
      "learning_rate": 5.692706818435836e-05,
      "loss": 0.519,
      "step": 20560
    },
    {
      "epoch": 0.6537691794529686,
      "grad_norm": 0.8412395715713501,
      "learning_rate": 5.674144992671882e-05,
      "loss": 0.501,
      "step": 20580
    },
    {
      "epoch": 0.6544045236506878,
      "grad_norm": 1.014061689376831,
      "learning_rate": 5.655601484321022e-05,
      "loss": 0.5122,
      "step": 20600
    },
    {
      "epoch": 0.6550398678484068,
      "grad_norm": 1.0746990442276,
      "learning_rate": 5.6370763719039375e-05,
      "loss": 0.4969,
      "step": 20620
    },
    {
      "epoch": 0.655675212046126,
      "grad_norm": 0.9021841883659363,
      "learning_rate": 5.6185697338634304e-05,
      "loss": 0.4771,
      "step": 20640
    },
    {
      "epoch": 0.6563105562438452,
      "grad_norm": 0.8193987607955933,
      "learning_rate": 5.600081648564056e-05,
      "loss": 0.5143,
      "step": 20660
    },
    {
      "epoch": 0.6569459004415642,
      "grad_norm": 1.152421474456787,
      "learning_rate": 5.581612194291814e-05,
      "loss": 0.4873,
      "step": 20680
    },
    {
      "epoch": 0.6575812446392834,
      "grad_norm": 0.8709347248077393,
      "learning_rate": 5.5631614492538217e-05,
      "loss": 0.5199,
      "step": 20700
    },
    {
      "epoch": 0.6582165888370024,
      "grad_norm": 0.827723503112793,
      "learning_rate": 5.544729491577967e-05,
      "loss": 0.4917,
      "step": 20720
    },
    {
      "epoch": 0.6588519330347216,
      "grad_norm": 1.5408345460891724,
      "learning_rate": 5.526316399312579e-05,
      "loss": 0.5562,
      "step": 20740
    },
    {
      "epoch": 0.6594872772324407,
      "grad_norm": 0.731490433216095,
      "learning_rate": 5.507922250426118e-05,
      "loss": 0.4927,
      "step": 20760
    },
    {
      "epoch": 0.6601226214301598,
      "grad_norm": 0.950702428817749,
      "learning_rate": 5.4895471228068185e-05,
      "loss": 0.5115,
      "step": 20780
    },
    {
      "epoch": 0.6607579656278789,
      "grad_norm": 0.8342424631118774,
      "learning_rate": 5.471191094262369e-05,
      "loss": 0.4856,
      "step": 20800
    },
    {
      "epoch": 0.661393309825598,
      "grad_norm": 0.9297844767570496,
      "learning_rate": 5.4528542425196004e-05,
      "loss": 0.4896,
      "step": 20820
    },
    {
      "epoch": 0.6620286540233171,
      "grad_norm": 0.7558259963989258,
      "learning_rate": 5.434536645224126e-05,
      "loss": 0.4895,
      "step": 20840
    },
    {
      "epoch": 0.6626639982210363,
      "grad_norm": 1.2116395235061646,
      "learning_rate": 5.416238379940035e-05,
      "loss": 0.507,
      "step": 20860
    },
    {
      "epoch": 0.6632993424187553,
      "grad_norm": 0.913467526435852,
      "learning_rate": 5.39795952414955e-05,
      "loss": 0.5137,
      "step": 20880
    },
    {
      "epoch": 0.6639346866164745,
      "grad_norm": 0.868238627910614,
      "learning_rate": 5.3797001552527184e-05,
      "loss": 0.5185,
      "step": 20900
    },
    {
      "epoch": 0.6645700308141936,
      "grad_norm": 1.0668286085128784,
      "learning_rate": 5.361460350567062e-05,
      "loss": 0.5158,
      "step": 20920
    },
    {
      "epoch": 0.6652053750119127,
      "grad_norm": 0.795097291469574,
      "learning_rate": 5.3432401873272655e-05,
      "loss": 0.4985,
      "step": 20940
    },
    {
      "epoch": 0.6658407192096318,
      "grad_norm": 0.6949301958084106,
      "learning_rate": 5.325039742684839e-05,
      "loss": 0.4722,
      "step": 20960
    },
    {
      "epoch": 0.6664760634073509,
      "grad_norm": 0.7859952449798584,
      "learning_rate": 5.3068590937077945e-05,
      "loss": 0.4933,
      "step": 20980
    },
    {
      "epoch": 0.66711140760507,
      "grad_norm": 0.8529000282287598,
      "learning_rate": 5.288698317380334e-05,
      "loss": 0.5098,
      "step": 21000
    },
    {
      "epoch": 0.66711140760507,
      "eval_loss": 0.45643100142478943,
      "eval_runtime": 44.6378,
      "eval_samples_per_second": 60.554,
      "eval_steps_per_second": 30.288,
      "step": 21000
    },
    {
      "epoch": 0.6677467518027892,
      "grad_norm": 0.9853639602661133,
      "learning_rate": 5.270557490602499e-05,
      "loss": 0.4715,
      "step": 21020
    },
    {
      "epoch": 0.6683820960005082,
      "grad_norm": 0.8387131690979004,
      "learning_rate": 5.2524366901898566e-05,
      "loss": 0.5128,
      "step": 21040
    },
    {
      "epoch": 0.6690174401982274,
      "grad_norm": 0.8610044717788696,
      "learning_rate": 5.234335992873176e-05,
      "loss": 0.5424,
      "step": 21060
    },
    {
      "epoch": 0.6696527843959466,
      "grad_norm": 0.8878015279769897,
      "learning_rate": 5.216255475298109e-05,
      "loss": 0.4734,
      "step": 21080
    },
    {
      "epoch": 0.6702881285936656,
      "grad_norm": 1.0038951635360718,
      "learning_rate": 5.198195214024848e-05,
      "loss": 0.4879,
      "step": 21100
    },
    {
      "epoch": 0.6709234727913848,
      "grad_norm": 0.9256641864776611,
      "learning_rate": 5.1801552855278126e-05,
      "loss": 0.527,
      "step": 21120
    },
    {
      "epoch": 0.6715588169891038,
      "grad_norm": 0.7668296098709106,
      "learning_rate": 5.162135766195337e-05,
      "loss": 0.5161,
      "step": 21140
    },
    {
      "epoch": 0.672194161186823,
      "grad_norm": 0.7756738066673279,
      "learning_rate": 5.144136732329323e-05,
      "loss": 0.5265,
      "step": 21160
    },
    {
      "epoch": 0.6728295053845421,
      "grad_norm": 0.9279829859733582,
      "learning_rate": 5.1261582601449285e-05,
      "loss": 0.4814,
      "step": 21180
    },
    {
      "epoch": 0.6734648495822612,
      "grad_norm": 1.1274375915527344,
      "learning_rate": 5.108200425770255e-05,
      "loss": 0.5061,
      "step": 21200
    },
    {
      "epoch": 0.6741001937799803,
      "grad_norm": 1.082535982131958,
      "learning_rate": 5.090263305246006e-05,
      "loss": 0.5081,
      "step": 21220
    },
    {
      "epoch": 0.6747355379776995,
      "grad_norm": 1.0355536937713623,
      "learning_rate": 5.0723469745251725e-05,
      "loss": 0.5044,
      "step": 21240
    },
    {
      "epoch": 0.6753708821754185,
      "grad_norm": 0.9309506416320801,
      "learning_rate": 5.054451509472728e-05,
      "loss": 0.5241,
      "step": 21260
    },
    {
      "epoch": 0.6760062263731377,
      "grad_norm": 0.818247377872467,
      "learning_rate": 5.0365769858652735e-05,
      "loss": 0.5034,
      "step": 21280
    },
    {
      "epoch": 0.6766415705708567,
      "grad_norm": 0.8921930193901062,
      "learning_rate": 5.0187234793907447e-05,
      "loss": 0.5089,
      "step": 21300
    },
    {
      "epoch": 0.6772769147685759,
      "grad_norm": 0.9915839433670044,
      "learning_rate": 5.000891065648087e-05,
      "loss": 0.5049,
      "step": 21320
    },
    {
      "epoch": 0.677912258966295,
      "grad_norm": 0.8783996105194092,
      "learning_rate": 4.983079820146922e-05,
      "loss": 0.5314,
      "step": 21340
    },
    {
      "epoch": 0.6785476031640141,
      "grad_norm": 0.8735405802726746,
      "learning_rate": 4.96528981830724e-05,
      "loss": 0.5036,
      "step": 21360
    },
    {
      "epoch": 0.6791829473617332,
      "grad_norm": 0.9674988389015198,
      "learning_rate": 4.947521135459072e-05,
      "loss": 0.5269,
      "step": 21380
    },
    {
      "epoch": 0.6798182915594523,
      "grad_norm": 0.9271227717399597,
      "learning_rate": 4.9297738468421896e-05,
      "loss": 0.5061,
      "step": 21400
    },
    {
      "epoch": 0.6804536357571714,
      "grad_norm": 0.7828012704849243,
      "learning_rate": 4.912048027605759e-05,
      "loss": 0.4978,
      "step": 21420
    },
    {
      "epoch": 0.6810889799548906,
      "grad_norm": 1.3417547941207886,
      "learning_rate": 4.8943437528080385e-05,
      "loss": 0.5326,
      "step": 21440
    },
    {
      "epoch": 0.6817243241526096,
      "grad_norm": 0.8963372707366943,
      "learning_rate": 4.876661097416066e-05,
      "loss": 0.4989,
      "step": 21460
    },
    {
      "epoch": 0.6823596683503288,
      "grad_norm": 0.893553614616394,
      "learning_rate": 4.859000136305329e-05,
      "loss": 0.4859,
      "step": 21480
    },
    {
      "epoch": 0.682995012548048,
      "grad_norm": 1.2325243949890137,
      "learning_rate": 4.8413609442594445e-05,
      "loss": 0.5037,
      "step": 21500
    },
    {
      "epoch": 0.683630356745767,
      "grad_norm": 0.8049502372741699,
      "learning_rate": 4.8237435959698706e-05,
      "loss": 0.509,
      "step": 21520
    },
    {
      "epoch": 0.6842657009434862,
      "grad_norm": 1.2289927005767822,
      "learning_rate": 4.8061481660355534e-05,
      "loss": 0.5128,
      "step": 21540
    },
    {
      "epoch": 0.6849010451412052,
      "grad_norm": 0.8123481869697571,
      "learning_rate": 4.7885747289626284e-05,
      "loss": 0.5031,
      "step": 21560
    },
    {
      "epoch": 0.6855363893389244,
      "grad_norm": 0.8852875232696533,
      "learning_rate": 4.771023359164116e-05,
      "loss": 0.4875,
      "step": 21580
    },
    {
      "epoch": 0.6861717335366435,
      "grad_norm": 0.8462742567062378,
      "learning_rate": 4.753494130959586e-05,
      "loss": 0.4787,
      "step": 21600
    },
    {
      "epoch": 0.6868070777343626,
      "grad_norm": 0.99876868724823,
      "learning_rate": 4.7359871185748485e-05,
      "loss": 0.5116,
      "step": 21620
    },
    {
      "epoch": 0.6874424219320817,
      "grad_norm": 0.9393181204795837,
      "learning_rate": 4.718502396141656e-05,
      "loss": 0.4878,
      "step": 21640
    },
    {
      "epoch": 0.6880777661298009,
      "grad_norm": 0.8426542282104492,
      "learning_rate": 4.701040037697364e-05,
      "loss": 0.4897,
      "step": 21660
    },
    {
      "epoch": 0.6887131103275199,
      "grad_norm": 0.938210666179657,
      "learning_rate": 4.683600117184631e-05,
      "loss": 0.492,
      "step": 21680
    },
    {
      "epoch": 0.6893484545252391,
      "grad_norm": 0.8325148820877075,
      "learning_rate": 4.666182708451114e-05,
      "loss": 0.4842,
      "step": 21700
    },
    {
      "epoch": 0.6899837987229581,
      "grad_norm": 0.8813055753707886,
      "learning_rate": 4.648787885249136e-05,
      "loss": 0.491,
      "step": 21720
    },
    {
      "epoch": 0.6906191429206773,
      "grad_norm": 1.0838825702667236,
      "learning_rate": 4.631415721235389e-05,
      "loss": 0.4732,
      "step": 21740
    },
    {
      "epoch": 0.6912544871183964,
      "grad_norm": 0.7203667163848877,
      "learning_rate": 4.614066289970609e-05,
      "loss": 0.4692,
      "step": 21760
    },
    {
      "epoch": 0.6918898313161155,
      "grad_norm": 1.181038737297058,
      "learning_rate": 4.596739664919287e-05,
      "loss": 0.5177,
      "step": 21780
    },
    {
      "epoch": 0.6925251755138346,
      "grad_norm": 0.9107904434204102,
      "learning_rate": 4.579435919449332e-05,
      "loss": 0.5186,
      "step": 21800
    },
    {
      "epoch": 0.6931605197115537,
      "grad_norm": 0.8281117081642151,
      "learning_rate": 4.5621551268317686e-05,
      "loss": 0.4848,
      "step": 21820
    },
    {
      "epoch": 0.6937958639092728,
      "grad_norm": 0.9180241227149963,
      "learning_rate": 4.545759700573378e-05,
      "loss": 0.4979,
      "step": 21840
    },
    {
      "epoch": 0.694431208106992,
      "grad_norm": 0.912675678730011,
      "learning_rate": 4.5285238763954426e-05,
      "loss": 0.5124,
      "step": 21860
    },
    {
      "epoch": 0.695066552304711,
      "grad_norm": 0.8163600564002991,
      "learning_rate": 4.5113112206520056e-05,
      "loss": 0.5205,
      "step": 21880
    },
    {
      "epoch": 0.6957018965024302,
      "grad_norm": 0.7308365702629089,
      "learning_rate": 4.494121806228392e-05,
      "loss": 0.5208,
      "step": 21900
    },
    {
      "epoch": 0.6963372407001494,
      "grad_norm": 0.7426006197929382,
      "learning_rate": 4.476955705911504e-05,
      "loss": 0.48,
      "step": 21920
    },
    {
      "epoch": 0.6969725848978684,
      "grad_norm": 0.9886866807937622,
      "learning_rate": 4.459812992389526e-05,
      "loss": 0.5483,
      "step": 21940
    },
    {
      "epoch": 0.6976079290955876,
      "grad_norm": 0.9653937816619873,
      "learning_rate": 4.44269373825162e-05,
      "loss": 0.4613,
      "step": 21960
    },
    {
      "epoch": 0.6982432732933066,
      "grad_norm": 0.8184491991996765,
      "learning_rate": 4.425598015987602e-05,
      "loss": 0.5212,
      "step": 21980
    },
    {
      "epoch": 0.6988786174910258,
      "grad_norm": 0.9365077614784241,
      "learning_rate": 4.408525897987645e-05,
      "loss": 0.4868,
      "step": 22000
    },
    {
      "epoch": 0.6988786174910258,
      "eval_loss": 0.45187339186668396,
      "eval_runtime": 44.7631,
      "eval_samples_per_second": 60.385,
      "eval_steps_per_second": 30.203,
      "step": 22000
    },
    {
      "epoch": 0.6995139616887449,
      "grad_norm": 0.9188706874847412,
      "learning_rate": 4.391477456541983e-05,
      "loss": 0.4991,
      "step": 22020
    },
    {
      "epoch": 0.700149305886464,
      "grad_norm": 0.8599129319190979,
      "learning_rate": 4.374452763840584e-05,
      "loss": 0.5184,
      "step": 22040
    },
    {
      "epoch": 0.7007846500841831,
      "grad_norm": 0.8643587827682495,
      "learning_rate": 4.357451891972854e-05,
      "loss": 0.4966,
      "step": 22060
    },
    {
      "epoch": 0.7014199942819023,
      "grad_norm": 0.9123074412345886,
      "learning_rate": 4.340474912927332e-05,
      "loss": 0.5068,
      "step": 22080
    },
    {
      "epoch": 0.7020553384796213,
      "grad_norm": 0.8422294855117798,
      "learning_rate": 4.323521898591394e-05,
      "loss": 0.4753,
      "step": 22100
    },
    {
      "epoch": 0.7026906826773405,
      "grad_norm": 0.8830937743186951,
      "learning_rate": 4.306592920750931e-05,
      "loss": 0.4837,
      "step": 22120
    },
    {
      "epoch": 0.7033260268750595,
      "grad_norm": 0.8540763854980469,
      "learning_rate": 4.289688051090054e-05,
      "loss": 0.4733,
      "step": 22140
    },
    {
      "epoch": 0.7039613710727787,
      "grad_norm": 0.8622573614120483,
      "learning_rate": 4.272807361190797e-05,
      "loss": 0.5003,
      "step": 22160
    },
    {
      "epoch": 0.7045967152704978,
      "grad_norm": 0.9827342629432678,
      "learning_rate": 4.2559509225328e-05,
      "loss": 0.5333,
      "step": 22180
    },
    {
      "epoch": 0.7052320594682169,
      "grad_norm": 0.8439646363258362,
      "learning_rate": 4.239118806493013e-05,
      "loss": 0.4778,
      "step": 22200
    },
    {
      "epoch": 0.705867403665936,
      "grad_norm": 0.9348493814468384,
      "learning_rate": 4.222311084345405e-05,
      "loss": 0.4806,
      "step": 22220
    },
    {
      "epoch": 0.7065027478636552,
      "grad_norm": 1.0671905279159546,
      "learning_rate": 4.2055278272606404e-05,
      "loss": 0.4978,
      "step": 22240
    },
    {
      "epoch": 0.7071380920613742,
      "grad_norm": 1.2363934516906738,
      "learning_rate": 4.188769106305787e-05,
      "loss": 0.5089,
      "step": 22260
    },
    {
      "epoch": 0.7077734362590934,
      "grad_norm": 0.9339464902877808,
      "learning_rate": 4.1720349924440295e-05,
      "loss": 0.4796,
      "step": 22280
    },
    {
      "epoch": 0.7084087804568124,
      "grad_norm": 0.873092770576477,
      "learning_rate": 4.155325556534345e-05,
      "loss": 0.4931,
      "step": 22300
    },
    {
      "epoch": 0.7090441246545316,
      "grad_norm": 0.7866622805595398,
      "learning_rate": 4.138640869331215e-05,
      "loss": 0.501,
      "step": 22320
    },
    {
      "epoch": 0.7096794688522507,
      "grad_norm": 1.0133357048034668,
      "learning_rate": 4.121981001484334e-05,
      "loss": 0.481,
      "step": 22340
    },
    {
      "epoch": 0.7103148130499698,
      "grad_norm": 0.9386391043663025,
      "learning_rate": 4.105346023538292e-05,
      "loss": 0.5303,
      "step": 22360
    },
    {
      "epoch": 0.710950157247689,
      "grad_norm": 0.7917353510856628,
      "learning_rate": 4.088736005932289e-05,
      "loss": 0.4993,
      "step": 22380
    },
    {
      "epoch": 0.711585501445408,
      "grad_norm": 0.9757121801376343,
      "learning_rate": 4.0721510189998266e-05,
      "loss": 0.5102,
      "step": 22400
    },
    {
      "epoch": 0.7122208456431272,
      "grad_norm": 1.2196959257125854,
      "learning_rate": 4.055591132968432e-05,
      "loss": 0.5045,
      "step": 22420
    },
    {
      "epoch": 0.7128561898408463,
      "grad_norm": 1.0833863019943237,
      "learning_rate": 4.039056417959328e-05,
      "loss": 0.5136,
      "step": 22440
    },
    {
      "epoch": 0.7134915340385654,
      "grad_norm": 0.7548487186431885,
      "learning_rate": 4.02254694398716e-05,
      "loss": 0.4864,
      "step": 22460
    },
    {
      "epoch": 0.7141268782362845,
      "grad_norm": 1.0435632467269897,
      "learning_rate": 4.006062780959697e-05,
      "loss": 0.4866,
      "step": 22480
    },
    {
      "epoch": 0.7147622224340037,
      "grad_norm": 0.7469571828842163,
      "learning_rate": 3.9896039986775256e-05,
      "loss": 0.4825,
      "step": 22500
    },
    {
      "epoch": 0.7153975666317227,
      "grad_norm": 0.8732174634933472,
      "learning_rate": 3.9731706668337585e-05,
      "loss": 0.4905,
      "step": 22520
    },
    {
      "epoch": 0.7160329108294419,
      "grad_norm": 0.8761599063873291,
      "learning_rate": 3.956762855013749e-05,
      "loss": 0.4831,
      "step": 22540
    },
    {
      "epoch": 0.7166682550271609,
      "grad_norm": 0.9746137261390686,
      "learning_rate": 3.940380632694781e-05,
      "loss": 0.5111,
      "step": 22560
    },
    {
      "epoch": 0.7173035992248801,
      "grad_norm": 0.9219092726707458,
      "learning_rate": 3.924024069245782e-05,
      "loss": 0.4908,
      "step": 22580
    },
    {
      "epoch": 0.7179389434225992,
      "grad_norm": 1.0305086374282837,
      "learning_rate": 3.907693233927038e-05,
      "loss": 0.5215,
      "step": 22600
    },
    {
      "epoch": 0.7185742876203183,
      "grad_norm": 0.7786363363265991,
      "learning_rate": 3.891388195889882e-05,
      "loss": 0.4792,
      "step": 22620
    },
    {
      "epoch": 0.7192096318180374,
      "grad_norm": 0.8930706977844238,
      "learning_rate": 3.875109024176413e-05,
      "loss": 0.4908,
      "step": 22640
    },
    {
      "epoch": 0.7198449760157566,
      "grad_norm": 1.0214048624038696,
      "learning_rate": 3.858855787719209e-05,
      "loss": 0.5102,
      "step": 22660
    },
    {
      "epoch": 0.7204803202134756,
      "grad_norm": 0.9279896020889282,
      "learning_rate": 3.842628555341018e-05,
      "loss": 0.4772,
      "step": 22680
    },
    {
      "epoch": 0.7211156644111948,
      "grad_norm": 1.6357091665267944,
      "learning_rate": 3.826427395754482e-05,
      "loss": 0.5041,
      "step": 22700
    },
    {
      "epoch": 0.7217510086089138,
      "grad_norm": 0.8421345949172974,
      "learning_rate": 3.8102523775618325e-05,
      "loss": 0.5082,
      "step": 22720
    },
    {
      "epoch": 0.722386352806633,
      "grad_norm": 0.9193027019500732,
      "learning_rate": 3.794103569254624e-05,
      "loss": 0.485,
      "step": 22740
    },
    {
      "epoch": 0.7230216970043521,
      "grad_norm": 0.8045080304145813,
      "learning_rate": 3.777981039213411e-05,
      "loss": 0.5182,
      "step": 22760
    },
    {
      "epoch": 0.7236570412020712,
      "grad_norm": 0.8535903692245483,
      "learning_rate": 3.7618848557074804e-05,
      "loss": 0.4796,
      "step": 22780
    },
    {
      "epoch": 0.7242923853997904,
      "grad_norm": 0.8225564360618591,
      "learning_rate": 3.745815086894565e-05,
      "loss": 0.4812,
      "step": 22800
    },
    {
      "epoch": 0.7249277295975094,
      "grad_norm": 0.8030312657356262,
      "learning_rate": 3.729771800820539e-05,
      "loss": 0.481,
      "step": 22820
    },
    {
      "epoch": 0.7255630737952286,
      "grad_norm": 0.992080569267273,
      "learning_rate": 3.713755065419133e-05,
      "loss": 0.4768,
      "step": 22840
    },
    {
      "epoch": 0.7261984179929477,
      "grad_norm": 0.9184660911560059,
      "learning_rate": 3.698563821122103e-05,
      "loss": 0.5044,
      "step": 22860
    },
    {
      "epoch": 0.7268337621906668,
      "grad_norm": 0.8250758647918701,
      "learning_rate": 3.6825990545007096e-05,
      "loss": 0.5095,
      "step": 22880
    },
    {
      "epoch": 0.7274691063883859,
      "grad_norm": 1.0519983768463135,
      "learning_rate": 3.666661038300353e-05,
      "loss": 0.4944,
      "step": 22900
    },
    {
      "epoch": 0.7281044505861051,
      "grad_norm": 0.789730966091156,
      "learning_rate": 3.650749840009022e-05,
      "loss": 0.4574,
      "step": 22920
    },
    {
      "epoch": 0.7287397947838241,
      "grad_norm": 0.8896093368530273,
      "learning_rate": 3.6356591030872534e-05,
      "loss": 0.5,
      "step": 22940
    },
    {
      "epoch": 0.7293751389815433,
      "grad_norm": 0.7810101509094238,
      "learning_rate": 3.6198003934005195e-05,
      "loss": 0.5053,
      "step": 22960
    },
    {
      "epoch": 0.7300104831792623,
      "grad_norm": 0.883144199848175,
      "learning_rate": 3.603968700049657e-05,
      "loss": 0.514,
      "step": 22980
    },
    {
      "epoch": 0.7306458273769815,
      "grad_norm": 0.7069016695022583,
      "learning_rate": 3.588164090072441e-05,
      "loss": 0.522,
      "step": 23000
    },
    {
      "epoch": 0.7306458273769815,
      "eval_loss": 0.4499790668487549,
      "eval_runtime": 45.0673,
      "eval_samples_per_second": 59.977,
      "eval_steps_per_second": 30.0,
      "step": 23000
    },
    {
      "epoch": 0.7312811715747006,
      "grad_norm": 1.0385907888412476,
      "learning_rate": 3.5723866303919554e-05,
      "loss": 0.489,
      "step": 23020
    },
    {
      "epoch": 0.7319165157724197,
      "grad_norm": 0.8796695470809937,
      "learning_rate": 3.556636387816317e-05,
      "loss": 0.4963,
      "step": 23040
    },
    {
      "epoch": 0.7325518599701388,
      "grad_norm": 0.9427993893623352,
      "learning_rate": 3.540913429038407e-05,
      "loss": 0.4601,
      "step": 23060
    },
    {
      "epoch": 0.733187204167858,
      "grad_norm": 0.8525741100311279,
      "learning_rate": 3.525217820635564e-05,
      "loss": 0.5034,
      "step": 23080
    },
    {
      "epoch": 0.733822548365577,
      "grad_norm": 0.8755898475646973,
      "learning_rate": 3.5095496290693155e-05,
      "loss": 0.509,
      "step": 23100
    },
    {
      "epoch": 0.7344578925632962,
      "grad_norm": 1.0328361988067627,
      "learning_rate": 3.4939089206851025e-05,
      "loss": 0.4994,
      "step": 23120
    },
    {
      "epoch": 0.7350932367610152,
      "grad_norm": 1.130226969718933,
      "learning_rate": 3.478295761711986e-05,
      "loss": 0.4848,
      "step": 23140
    },
    {
      "epoch": 0.7357285809587344,
      "grad_norm": 0.733567476272583,
      "learning_rate": 3.4627102182623696e-05,
      "loss": 0.5123,
      "step": 23160
    },
    {
      "epoch": 0.7363639251564535,
      "grad_norm": 1.1062750816345215,
      "learning_rate": 3.447152356331721e-05,
      "loss": 0.4767,
      "step": 23180
    },
    {
      "epoch": 0.7369992693541726,
      "grad_norm": 0.9558404684066772,
      "learning_rate": 3.431622241798305e-05,
      "loss": 0.4832,
      "step": 23200
    },
    {
      "epoch": 0.7376346135518917,
      "grad_norm": 0.8974496722221375,
      "learning_rate": 3.416119940422877e-05,
      "loss": 0.4818,
      "step": 23220
    },
    {
      "epoch": 0.7382699577496109,
      "grad_norm": 1.2721449136734009,
      "learning_rate": 3.400645517848427e-05,
      "loss": 0.5102,
      "step": 23240
    },
    {
      "epoch": 0.73890530194733,
      "grad_norm": 1.0408607721328735,
      "learning_rate": 3.385199039599902e-05,
      "loss": 0.4784,
      "step": 23260
    },
    {
      "epoch": 0.7395406461450491,
      "grad_norm": 0.9826887845993042,
      "learning_rate": 3.369780571083909e-05,
      "loss": 0.5039,
      "step": 23280
    },
    {
      "epoch": 0.7401759903427682,
      "grad_norm": 0.8110315799713135,
      "learning_rate": 3.354390177588454e-05,
      "loss": 0.5034,
      "step": 23300
    },
    {
      "epoch": 0.7408113345404873,
      "grad_norm": 0.8513306975364685,
      "learning_rate": 3.339027924282673e-05,
      "loss": 0.509,
      "step": 23320
    },
    {
      "epoch": 0.7414466787382065,
      "grad_norm": 0.8255580067634583,
      "learning_rate": 3.323693876216529e-05,
      "loss": 0.4678,
      "step": 23340
    },
    {
      "epoch": 0.7420820229359255,
      "grad_norm": 1.1336640119552612,
      "learning_rate": 3.30838809832056e-05,
      "loss": 0.4848,
      "step": 23360
    },
    {
      "epoch": 0.7427173671336447,
      "grad_norm": 0.8720375895500183,
      "learning_rate": 3.2931106554056005e-05,
      "loss": 0.4929,
      "step": 23380
    },
    {
      "epoch": 0.7433527113313637,
      "grad_norm": 1.0169090032577515,
      "learning_rate": 3.277861612162498e-05,
      "loss": 0.5066,
      "step": 23400
    },
    {
      "epoch": 0.7439880555290829,
      "grad_norm": 1.2800534963607788,
      "learning_rate": 3.262641033161843e-05,
      "loss": 0.4964,
      "step": 23420
    },
    {
      "epoch": 0.744623399726802,
      "grad_norm": 0.819925844669342,
      "learning_rate": 3.2474489828537046e-05,
      "loss": 0.509,
      "step": 23440
    },
    {
      "epoch": 0.7452587439245211,
      "grad_norm": 0.8024299144744873,
      "learning_rate": 3.232285525567343e-05,
      "loss": 0.4922,
      "step": 23460
    },
    {
      "epoch": 0.7458940881222402,
      "grad_norm": 1.1049789190292358,
      "learning_rate": 3.217150725510946e-05,
      "loss": 0.4907,
      "step": 23480
    },
    {
      "epoch": 0.7465294323199594,
      "grad_norm": 1.0818272829055786,
      "learning_rate": 3.2020446467713516e-05,
      "loss": 0.4806,
      "step": 23500
    },
    {
      "epoch": 0.7471647765176784,
      "grad_norm": 0.6681995391845703,
      "learning_rate": 3.18696735331379e-05,
      "loss": 0.4504,
      "step": 23520
    },
    {
      "epoch": 0.7478001207153976,
      "grad_norm": 0.8827902674674988,
      "learning_rate": 3.171918908981595e-05,
      "loss": 0.5081,
      "step": 23540
    },
    {
      "epoch": 0.7484354649131166,
      "grad_norm": 1.0249037742614746,
      "learning_rate": 3.156899377495938e-05,
      "loss": 0.5297,
      "step": 23560
    },
    {
      "epoch": 0.7490708091108358,
      "grad_norm": 1.0797147750854492,
      "learning_rate": 3.141908822455574e-05,
      "loss": 0.4701,
      "step": 23580
    },
    {
      "epoch": 0.749706153308555,
      "grad_norm": 0.724281907081604,
      "learning_rate": 3.126947307336551e-05,
      "loss": 0.4608,
      "step": 23600
    },
    {
      "epoch": 0.750341497506274,
      "grad_norm": 0.7410632967948914,
      "learning_rate": 3.1120148954919485e-05,
      "loss": 0.4747,
      "step": 23620
    },
    {
      "epoch": 0.7509768417039931,
      "grad_norm": 1.0309559106826782,
      "learning_rate": 3.09711165015162e-05,
      "loss": 0.534,
      "step": 23640
    },
    {
      "epoch": 0.7516121859017123,
      "grad_norm": 0.9060602784156799,
      "learning_rate": 3.0822376344219105e-05,
      "loss": 0.4709,
      "step": 23660
    },
    {
      "epoch": 0.7522475300994313,
      "grad_norm": 0.9018211364746094,
      "learning_rate": 3.067392911285395e-05,
      "loss": 0.5084,
      "step": 23680
    },
    {
      "epoch": 0.7528828742971505,
      "grad_norm": 1.1375420093536377,
      "learning_rate": 3.0525775436006107e-05,
      "loss": 0.5023,
      "step": 23700
    },
    {
      "epoch": 0.7535182184948696,
      "grad_norm": 0.8034165501594543,
      "learning_rate": 3.0377915941017955e-05,
      "loss": 0.4947,
      "step": 23720
    },
    {
      "epoch": 0.7541535626925887,
      "grad_norm": 1.0958040952682495,
      "learning_rate": 3.0230351253986143e-05,
      "loss": 0.5009,
      "step": 23740
    },
    {
      "epoch": 0.7547889068903079,
      "grad_norm": 0.8740959763526917,
      "learning_rate": 3.0083081999759067e-05,
      "loss": 0.4942,
      "step": 23760
    },
    {
      "epoch": 0.7554242510880269,
      "grad_norm": 0.8798695206642151,
      "learning_rate": 2.993610880193406e-05,
      "loss": 0.4676,
      "step": 23780
    },
    {
      "epoch": 0.7560595952857461,
      "grad_norm": 0.9538172483444214,
      "learning_rate": 2.9789432282854822e-05,
      "loss": 0.4441,
      "step": 23800
    },
    {
      "epoch": 0.7566949394834651,
      "grad_norm": 0.9560829401016235,
      "learning_rate": 2.9643053063608917e-05,
      "loss": 0.4995,
      "step": 23820
    },
    {
      "epoch": 0.7573302836811843,
      "grad_norm": 1.0306763648986816,
      "learning_rate": 2.9496971764024884e-05,
      "loss": 0.5042,
      "step": 23840
    },
    {
      "epoch": 0.7579656278789034,
      "grad_norm": 0.9823128581047058,
      "learning_rate": 2.9351189002669788e-05,
      "loss": 0.5274,
      "step": 23860
    },
    {
      "epoch": 0.7586009720766225,
      "grad_norm": 0.8448672890663147,
      "learning_rate": 2.920570539684665e-05,
      "loss": 0.4713,
      "step": 23880
    },
    {
      "epoch": 0.7592363162743416,
      "grad_norm": 0.8830504417419434,
      "learning_rate": 2.9060521562591624e-05,
      "loss": 0.5069,
      "step": 23900
    },
    {
      "epoch": 0.7598716604720608,
      "grad_norm": 0.9051734805107117,
      "learning_rate": 2.891563811467154e-05,
      "loss": 0.48,
      "step": 23920
    },
    {
      "epoch": 0.7605070046697798,
      "grad_norm": 0.8309674859046936,
      "learning_rate": 2.877105566658136e-05,
      "loss": 0.5141,
      "step": 23940
    },
    {
      "epoch": 0.761142348867499,
      "grad_norm": 0.8684896230697632,
      "learning_rate": 2.863398169962057e-05,
      "loss": 0.4518,
      "step": 23960
    },
    {
      "epoch": 0.761777693065218,
      "grad_norm": 0.959536075592041,
      "learning_rate": 2.8489987960934184e-05,
      "loss": 0.483,
      "step": 23980
    },
    {
      "epoch": 0.7624130372629372,
      "grad_norm": 1.3519070148468018,
      "learning_rate": 2.8353474370325594e-05,
      "loss": 0.5062,
      "step": 24000
    },
    {
      "epoch": 0.7624130372629372,
      "eval_loss": 0.4479082524776459,
      "eval_runtime": 44.6533,
      "eval_samples_per_second": 60.533,
      "eval_steps_per_second": 30.278,
      "step": 24000
    },
    {
      "epoch": 0.7630483814606563,
      "grad_norm": 0.8832095861434937,
      "learning_rate": 2.8210071659529526e-05,
      "loss": 0.5204,
      "step": 24020
    },
    {
      "epoch": 0.7636837256583754,
      "grad_norm": 0.793205738067627,
      "learning_rate": 2.8066972936216017e-05,
      "loss": 0.5037,
      "step": 24040
    },
    {
      "epoch": 0.7643190698560945,
      "grad_norm": 0.8483644127845764,
      "learning_rate": 2.79241788063227e-05,
      "loss": 0.4812,
      "step": 24060
    },
    {
      "epoch": 0.7649544140538137,
      "grad_norm": 1.50220787525177,
      "learning_rate": 2.7781689874497406e-05,
      "loss": 0.501,
      "step": 24080
    },
    {
      "epoch": 0.7655897582515327,
      "grad_norm": 0.8091638684272766,
      "learning_rate": 2.7639506744095766e-05,
      "loss": 0.4932,
      "step": 24100
    },
    {
      "epoch": 0.7662251024492519,
      "grad_norm": 0.9171321392059326,
      "learning_rate": 2.74976300171784e-05,
      "loss": 0.5,
      "step": 24120
    },
    {
      "epoch": 0.766860446646971,
      "grad_norm": 0.9392116069793701,
      "learning_rate": 2.7356060294508502e-05,
      "loss": 0.5075,
      "step": 24140
    },
    {
      "epoch": 0.7674957908446901,
      "grad_norm": 0.9384047389030457,
      "learning_rate": 2.7214798175549395e-05,
      "loss": 0.4893,
      "step": 24160
    },
    {
      "epoch": 0.7681311350424093,
      "grad_norm": 0.7760775685310364,
      "learning_rate": 2.707384425846178e-05,
      "loss": 0.5267,
      "step": 24180
    },
    {
      "epoch": 0.7687664792401283,
      "grad_norm": 0.8666489720344543,
      "learning_rate": 2.6933199140101285e-05,
      "loss": 0.5201,
      "step": 24200
    },
    {
      "epoch": 0.7694018234378475,
      "grad_norm": 0.9711599946022034,
      "learning_rate": 2.679286341601609e-05,
      "loss": 0.4923,
      "step": 24220
    },
    {
      "epoch": 0.7700371676355666,
      "grad_norm": 0.9399335980415344,
      "learning_rate": 2.6652837680444153e-05,
      "loss": 0.5281,
      "step": 24240
    },
    {
      "epoch": 0.7706725118332857,
      "grad_norm": 0.8116670250892639,
      "learning_rate": 2.651312252631083e-05,
      "loss": 0.5111,
      "step": 24260
    },
    {
      "epoch": 0.7713078560310048,
      "grad_norm": 0.873943030834198,
      "learning_rate": 2.6373718545226445e-05,
      "loss": 0.471,
      "step": 24280
    },
    {
      "epoch": 0.7719432002287239,
      "grad_norm": 0.9560205340385437,
      "learning_rate": 2.623462632748359e-05,
      "loss": 0.5101,
      "step": 24300
    },
    {
      "epoch": 0.772578544426443,
      "grad_norm": 1.011898159980774,
      "learning_rate": 2.6095846462054763e-05,
      "loss": 0.4906,
      "step": 24320
    },
    {
      "epoch": 0.7732138886241622,
      "grad_norm": 1.0334892272949219,
      "learning_rate": 2.595737953658982e-05,
      "loss": 0.4905,
      "step": 24340
    },
    {
      "epoch": 0.7738492328218812,
      "grad_norm": 0.6994766592979431,
      "learning_rate": 2.581922613741352e-05,
      "loss": 0.4794,
      "step": 24360
    },
    {
      "epoch": 0.7744845770196004,
      "grad_norm": 0.9781257510185242,
      "learning_rate": 2.5681386849523003e-05,
      "loss": 0.4871,
      "step": 24380
    },
    {
      "epoch": 0.7751199212173194,
      "grad_norm": 1.0443729162216187,
      "learning_rate": 2.5543862256585393e-05,
      "loss": 0.5133,
      "step": 24400
    },
    {
      "epoch": 0.7757552654150386,
      "grad_norm": 0.8841618299484253,
      "learning_rate": 2.5406652940935217e-05,
      "loss": 0.4865,
      "step": 24420
    },
    {
      "epoch": 0.7763906096127577,
      "grad_norm": 0.8439558148384094,
      "learning_rate": 2.5269759483571954e-05,
      "loss": 0.4908,
      "step": 24440
    },
    {
      "epoch": 0.7770259538104768,
      "grad_norm": 0.9146759510040283,
      "learning_rate": 2.5133182464157734e-05,
      "loss": 0.4934,
      "step": 24460
    },
    {
      "epoch": 0.777661298008196,
      "grad_norm": 0.7785593867301941,
      "learning_rate": 2.499692246101466e-05,
      "loss": 0.4857,
      "step": 24480
    },
    {
      "epoch": 0.7782966422059151,
      "grad_norm": 0.9240188002586365,
      "learning_rate": 2.4860980051122474e-05,
      "loss": 0.4958,
      "step": 24500
    },
    {
      "epoch": 0.7789319864036341,
      "grad_norm": 1.0593191385269165,
      "learning_rate": 2.4725355810116103e-05,
      "loss": 0.5077,
      "step": 24520
    },
    {
      "epoch": 0.7795673306013533,
      "grad_norm": 0.8705240488052368,
      "learning_rate": 2.4590050312283263e-05,
      "loss": 0.4792,
      "step": 24540
    },
    {
      "epoch": 0.7802026747990723,
      "grad_norm": 0.8610863089561462,
      "learning_rate": 2.4455064130561944e-05,
      "loss": 0.4949,
      "step": 24560
    },
    {
      "epoch": 0.7808380189967915,
      "grad_norm": 1.152521014213562,
      "learning_rate": 2.432039783653799e-05,
      "loss": 0.5076,
      "step": 24580
    },
    {
      "epoch": 0.7814733631945107,
      "grad_norm": 0.8608033657073975,
      "learning_rate": 2.4186052000442806e-05,
      "loss": 0.4759,
      "step": 24600
    },
    {
      "epoch": 0.7821087073922297,
      "grad_norm": 1.1664726734161377,
      "learning_rate": 2.4052027191150762e-05,
      "loss": 0.4941,
      "step": 24620
    },
    {
      "epoch": 0.7827440515899489,
      "grad_norm": 0.8805221915245056,
      "learning_rate": 2.3918323976176883e-05,
      "loss": 0.4797,
      "step": 24640
    },
    {
      "epoch": 0.783379395787668,
      "grad_norm": 0.7699743509292603,
      "learning_rate": 2.3784942921674512e-05,
      "loss": 0.4903,
      "step": 24660
    },
    {
      "epoch": 0.7840147399853871,
      "grad_norm": 0.9498074650764465,
      "learning_rate": 2.365188459243274e-05,
      "loss": 0.4679,
      "step": 24680
    },
    {
      "epoch": 0.7846500841831062,
      "grad_norm": 0.815447986125946,
      "learning_rate": 2.351914955187412e-05,
      "loss": 0.5114,
      "step": 24700
    },
    {
      "epoch": 0.7852854283808253,
      "grad_norm": 0.984866738319397,
      "learning_rate": 2.3386738362052353e-05,
      "loss": 0.4725,
      "step": 24720
    },
    {
      "epoch": 0.7859207725785444,
      "grad_norm": 1.0802818536758423,
      "learning_rate": 2.3254651583649735e-05,
      "loss": 0.4684,
      "step": 24740
    },
    {
      "epoch": 0.7865561167762636,
      "grad_norm": 0.8058573007583618,
      "learning_rate": 2.3122889775974887e-05,
      "loss": 0.4847,
      "step": 24760
    },
    {
      "epoch": 0.7871914609739826,
      "grad_norm": 0.8836669921875,
      "learning_rate": 2.2991453496960447e-05,
      "loss": 0.4859,
      "step": 24780
    },
    {
      "epoch": 0.7878268051717018,
      "grad_norm": 0.7214009165763855,
      "learning_rate": 2.2860343303160535e-05,
      "loss": 0.4816,
      "step": 24800
    },
    {
      "epoch": 0.7884621493694208,
      "grad_norm": 0.8268193006515503,
      "learning_rate": 2.2729559749748575e-05,
      "loss": 0.4674,
      "step": 24820
    },
    {
      "epoch": 0.78909749356714,
      "grad_norm": 0.7158612608909607,
      "learning_rate": 2.2599103390514766e-05,
      "loss": 0.465,
      "step": 24840
    },
    {
      "epoch": 0.7897328377648591,
      "grad_norm": 0.8904339671134949,
      "learning_rate": 2.246897477786396e-05,
      "loss": 0.5024,
      "step": 24860
    },
    {
      "epoch": 0.7903681819625782,
      "grad_norm": 0.8315703272819519,
      "learning_rate": 2.2339174462813127e-05,
      "loss": 0.4609,
      "step": 24880
    },
    {
      "epoch": 0.7910035261602973,
      "grad_norm": 0.8962224721908569,
      "learning_rate": 2.2209702994989045e-05,
      "loss": 0.4906,
      "step": 24900
    },
    {
      "epoch": 0.7916388703580165,
      "grad_norm": 0.9301977753639221,
      "learning_rate": 2.208056092262616e-05,
      "loss": 0.5216,
      "step": 24920
    },
    {
      "epoch": 0.7922742145557355,
      "grad_norm": 0.8634437918663025,
      "learning_rate": 2.1951748792563985e-05,
      "loss": 0.5031,
      "step": 24940
    },
    {
      "epoch": 0.7929095587534547,
      "grad_norm": 0.8985020518302917,
      "learning_rate": 2.1823267150244964e-05,
      "loss": 0.4709,
      "step": 24960
    },
    {
      "epoch": 0.7935449029511737,
      "grad_norm": 1.1470792293548584,
      "learning_rate": 2.16951165397122e-05,
      "loss": 0.5224,
      "step": 24980
    },
    {
      "epoch": 0.7941802471488929,
      "grad_norm": 0.919326663017273,
      "learning_rate": 2.1567297503606987e-05,
      "loss": 0.5004,
      "step": 25000
    },
    {
      "epoch": 0.7941802471488929,
      "eval_loss": 0.44602036476135254,
      "eval_runtime": 44.8391,
      "eval_samples_per_second": 60.282,
      "eval_steps_per_second": 30.152,
      "step": 25000
    },
    {
      "epoch": 0.7948155913466121,
      "grad_norm": 1.1010879278182983,
      "learning_rate": 2.1439810583166587e-05,
      "loss": 0.5077,
      "step": 25020
    },
    {
      "epoch": 0.7954509355443311,
      "grad_norm": 0.8573036789894104,
      "learning_rate": 2.131900612258364e-05,
      "loss": 0.4973,
      "step": 25040
    },
    {
      "epoch": 0.7960862797420503,
      "grad_norm": 0.8931069374084473,
      "learning_rate": 2.1198502345256165e-05,
      "loss": 0.4972,
      "step": 25060
    },
    {
      "epoch": 0.7967216239397694,
      "grad_norm": 1.239161491394043,
      "learning_rate": 2.107198160794136e-05,
      "loss": 0.4981,
      "step": 25080
    },
    {
      "epoch": 0.7973569681374885,
      "grad_norm": 0.9950107336044312,
      "learning_rate": 2.0945795083658447e-05,
      "loss": 0.506,
      "step": 25100
    },
    {
      "epoch": 0.7979923123352076,
      "grad_norm": 0.7783673405647278,
      "learning_rate": 2.0819943306732082e-05,
      "loss": 0.4763,
      "step": 25120
    },
    {
      "epoch": 0.7986276565329267,
      "grad_norm": 0.912331223487854,
      "learning_rate": 2.0694426810069345e-05,
      "loss": 0.4622,
      "step": 25140
    },
    {
      "epoch": 0.7992630007306458,
      "grad_norm": 0.8284201622009277,
      "learning_rate": 2.0569246125157658e-05,
      "loss": 0.513,
      "step": 25160
    },
    {
      "epoch": 0.799898344928365,
      "grad_norm": 1.1468638181686401,
      "learning_rate": 2.0444401782062518e-05,
      "loss": 0.4719,
      "step": 25180
    },
    {
      "epoch": 0.800533689126084,
      "grad_norm": 1.0985773801803589,
      "learning_rate": 2.0319894309425146e-05,
      "loss": 0.4871,
      "step": 25200
    },
    {
      "epoch": 0.8011690333238032,
      "grad_norm": 1.1010768413543701,
      "learning_rate": 2.0195724234460322e-05,
      "loss": 0.5459,
      "step": 25220
    },
    {
      "epoch": 0.8018043775215223,
      "grad_norm": 0.9938257336616516,
      "learning_rate": 2.0071892082954248e-05,
      "loss": 0.5127,
      "step": 25240
    },
    {
      "epoch": 0.8024397217192414,
      "grad_norm": 1.1338539123535156,
      "learning_rate": 1.9954565018232684e-05,
      "loss": 0.4838,
      "step": 25260
    },
    {
      "epoch": 0.8030750659169605,
      "grad_norm": 0.7955858111381531,
      "learning_rate": 1.9831393324342518e-05,
      "loss": 0.4865,
      "step": 25280
    },
    {
      "epoch": 0.8037104101146796,
      "grad_norm": 1.0443702936172485,
      "learning_rate": 1.9708561096634902e-05,
      "loss": 0.4749,
      "step": 25300
    },
    {
      "epoch": 0.8043457543123987,
      "grad_norm": 1.0816038846969604,
      "learning_rate": 1.958606885523103e-05,
      "loss": 0.5142,
      "step": 25320
    },
    {
      "epoch": 0.8049810985101179,
      "grad_norm": 1.2127019166946411,
      "learning_rate": 1.946391711881239e-05,
      "loss": 0.4831,
      "step": 25340
    },
    {
      "epoch": 0.8056164427078369,
      "grad_norm": 0.8780348300933838,
      "learning_rate": 1.9342106404618632e-05,
      "loss": 0.5113,
      "step": 25360
    },
    {
      "epoch": 0.8062517869055561,
      "grad_norm": 0.7795581221580505,
      "learning_rate": 1.9220637228445438e-05,
      "loss": 0.4721,
      "step": 25380
    },
    {
      "epoch": 0.8068871311032751,
      "grad_norm": 0.9518604874610901,
      "learning_rate": 1.9099510104642216e-05,
      "loss": 0.4754,
      "step": 25400
    },
    {
      "epoch": 0.8075224753009943,
      "grad_norm": 1.0051589012145996,
      "learning_rate": 1.8978725546110022e-05,
      "loss": 0.4936,
      "step": 25420
    },
    {
      "epoch": 0.8081578194987135,
      "grad_norm": 0.8047780394554138,
      "learning_rate": 1.8858284064299326e-05,
      "loss": 0.4901,
      "step": 25440
    },
    {
      "epoch": 0.8087931636964325,
      "grad_norm": 1.1246352195739746,
      "learning_rate": 1.8738186169207917e-05,
      "loss": 0.5117,
      "step": 25460
    },
    {
      "epoch": 0.8094285078941517,
      "grad_norm": 0.8150719404220581,
      "learning_rate": 1.861843236937867e-05,
      "loss": 0.4685,
      "step": 25480
    },
    {
      "epoch": 0.8100638520918708,
      "grad_norm": 2.195882558822632,
      "learning_rate": 1.8499023171897388e-05,
      "loss": 0.471,
      "step": 25500
    },
    {
      "epoch": 0.8106991962895899,
      "grad_norm": 0.8962704539299011,
      "learning_rate": 1.8379959082390798e-05,
      "loss": 0.481,
      "step": 25520
    },
    {
      "epoch": 0.811334540487309,
      "grad_norm": 0.8531712889671326,
      "learning_rate": 1.8261240605024165e-05,
      "loss": 0.4881,
      "step": 25540
    },
    {
      "epoch": 0.8119698846850281,
      "grad_norm": 0.9354826807975769,
      "learning_rate": 1.8142868242499368e-05,
      "loss": 0.4761,
      "step": 25560
    },
    {
      "epoch": 0.8126052288827472,
      "grad_norm": 1.0048118829727173,
      "learning_rate": 1.8024842496052708e-05,
      "loss": 0.4968,
      "step": 25580
    },
    {
      "epoch": 0.8132405730804664,
      "grad_norm": 0.8254916071891785,
      "learning_rate": 1.790716386545275e-05,
      "loss": 0.5076,
      "step": 25600
    },
    {
      "epoch": 0.8138759172781854,
      "grad_norm": 0.9708372950553894,
      "learning_rate": 1.778983284899819e-05,
      "loss": 0.5197,
      "step": 25620
    },
    {
      "epoch": 0.8145112614759046,
      "grad_norm": 0.9034101366996765,
      "learning_rate": 1.767284994351588e-05,
      "loss": 0.4954,
      "step": 25640
    },
    {
      "epoch": 0.8151466056736237,
      "grad_norm": 1.3567668199539185,
      "learning_rate": 1.7556215644358564e-05,
      "loss": 0.5133,
      "step": 25660
    },
    {
      "epoch": 0.8157819498713428,
      "grad_norm": 0.9000421166419983,
      "learning_rate": 1.743993044540282e-05,
      "loss": 0.524,
      "step": 25680
    },
    {
      "epoch": 0.8164172940690619,
      "grad_norm": 0.7230278849601746,
      "learning_rate": 1.7323994839047086e-05,
      "loss": 0.4831,
      "step": 25700
    },
    {
      "epoch": 0.817052638266781,
      "grad_norm": 0.8648797273635864,
      "learning_rate": 1.7208409316209407e-05,
      "loss": 0.4932,
      "step": 25720
    },
    {
      "epoch": 0.8176879824645001,
      "grad_norm": 0.9017996788024902,
      "learning_rate": 1.709317436632547e-05,
      "loss": 0.4787,
      "step": 25740
    },
    {
      "epoch": 0.8183233266622193,
      "grad_norm": 0.9122520685195923,
      "learning_rate": 1.697829047734646e-05,
      "loss": 0.4721,
      "step": 25760
    },
    {
      "epoch": 0.8189586708599383,
      "grad_norm": 0.9448441863059998,
      "learning_rate": 1.6863758135737085e-05,
      "loss": 0.4772,
      "step": 25780
    },
    {
      "epoch": 0.8195940150576575,
      "grad_norm": 1.052437424659729,
      "learning_rate": 1.6749577826473405e-05,
      "loss": 0.5252,
      "step": 25800
    },
    {
      "epoch": 0.8202293592553767,
      "grad_norm": 0.9826536774635315,
      "learning_rate": 1.6635750033040842e-05,
      "loss": 0.5187,
      "step": 25820
    },
    {
      "epoch": 0.8208647034530957,
      "grad_norm": 0.8498765826225281,
      "learning_rate": 1.6522275237432193e-05,
      "loss": 0.4792,
      "step": 25840
    },
    {
      "epoch": 0.8215000476508149,
      "grad_norm": 0.9139013886451721,
      "learning_rate": 1.6409153920145416e-05,
      "loss": 0.5006,
      "step": 25860
    },
    {
      "epoch": 0.8221353918485339,
      "grad_norm": 0.9082590937614441,
      "learning_rate": 1.6296386560181744e-05,
      "loss": 0.4801,
      "step": 25880
    },
    {
      "epoch": 0.8227707360462531,
      "grad_norm": 0.8360690474510193,
      "learning_rate": 1.618397363504366e-05,
      "loss": 0.491,
      "step": 25900
    },
    {
      "epoch": 0.8234060802439722,
      "grad_norm": 0.8585413098335266,
      "learning_rate": 1.6071915620732746e-05,
      "loss": 0.4952,
      "step": 25920
    },
    {
      "epoch": 0.8240414244416913,
      "grad_norm": 0.9051182866096497,
      "learning_rate": 1.5960212991747804e-05,
      "loss": 0.5021,
      "step": 25940
    },
    {
      "epoch": 0.8246767686394104,
      "grad_norm": 1.1850552558898926,
      "learning_rate": 1.584886622108276e-05,
      "loss": 0.5194,
      "step": 25960
    },
    {
      "epoch": 0.8253121128371295,
      "grad_norm": 0.8449670672416687,
      "learning_rate": 1.57378757802247e-05,
      "loss": 0.4988,
      "step": 25980
    },
    {
      "epoch": 0.8259474570348486,
      "grad_norm": 0.9663527607917786,
      "learning_rate": 1.5627242139151867e-05,
      "loss": 0.4782,
      "step": 26000
    },
    {
      "epoch": 0.8259474570348486,
      "eval_loss": 0.44560423493385315,
      "eval_runtime": 45.0247,
      "eval_samples_per_second": 60.034,
      "eval_steps_per_second": 30.028,
      "step": 26000
    },
    {
      "epoch": 0.8265828012325678,
      "grad_norm": 1.0954176187515259,
      "learning_rate": 1.5516965766331715e-05,
      "loss": 0.4992,
      "step": 26020
    },
    {
      "epoch": 0.8272181454302868,
      "grad_norm": 0.9752370119094849,
      "learning_rate": 1.540704712871881e-05,
      "loss": 0.5109,
      "step": 26040
    },
    {
      "epoch": 0.827853489628006,
      "grad_norm": 0.7089188098907471,
      "learning_rate": 1.5297486691752928e-05,
      "loss": 0.4669,
      "step": 26060
    },
    {
      "epoch": 0.8284888338257251,
      "grad_norm": 0.8641648292541504,
      "learning_rate": 1.5188284919357155e-05,
      "loss": 0.4905,
      "step": 26080
    },
    {
      "epoch": 0.8291241780234442,
      "grad_norm": 0.8167259097099304,
      "learning_rate": 1.5079442273935773e-05,
      "loss": 0.4776,
      "step": 26100
    },
    {
      "epoch": 0.8297595222211633,
      "grad_norm": 0.9287614226341248,
      "learning_rate": 1.4970959216372372e-05,
      "loss": 0.4803,
      "step": 26120
    },
    {
      "epoch": 0.8303948664188824,
      "grad_norm": 0.8652564883232117,
      "learning_rate": 1.4862836206027975e-05,
      "loss": 0.4623,
      "step": 26140
    },
    {
      "epoch": 0.8310302106166015,
      "grad_norm": 0.9141151309013367,
      "learning_rate": 1.4755073700738953e-05,
      "loss": 0.507,
      "step": 26160
    },
    {
      "epoch": 0.8316655548143207,
      "grad_norm": 0.9454159736633301,
      "learning_rate": 1.464767215681515e-05,
      "loss": 0.5218,
      "step": 26180
    },
    {
      "epoch": 0.8323008990120397,
      "grad_norm": 0.7766212821006775,
      "learning_rate": 1.4540632029038026e-05,
      "loss": 0.5294,
      "step": 26200
    },
    {
      "epoch": 0.8329362432097589,
      "grad_norm": 0.8662501573562622,
      "learning_rate": 1.443395377065858e-05,
      "loss": 0.4931,
      "step": 26220
    },
    {
      "epoch": 0.833571587407478,
      "grad_norm": 1.0195443630218506,
      "learning_rate": 1.4327637833395525e-05,
      "loss": 0.5165,
      "step": 26240
    },
    {
      "epoch": 0.8342069316051971,
      "grad_norm": 0.9022318124771118,
      "learning_rate": 1.422168466743341e-05,
      "loss": 0.4732,
      "step": 26260
    },
    {
      "epoch": 0.8348422758029163,
      "grad_norm": 0.9162563681602478,
      "learning_rate": 1.4116094721420625e-05,
      "loss": 0.496,
      "step": 26280
    },
    {
      "epoch": 0.8354776200006353,
      "grad_norm": 1.129158854484558,
      "learning_rate": 1.401086844246755e-05,
      "loss": 0.4764,
      "step": 26300
    },
    {
      "epoch": 0.8361129641983545,
      "grad_norm": 0.8695496320724487,
      "learning_rate": 1.3906006276144601e-05,
      "loss": 0.4852,
      "step": 26320
    },
    {
      "epoch": 0.8367483083960736,
      "grad_norm": 1.7362381219863892,
      "learning_rate": 1.3801508666480512e-05,
      "loss": 0.4642,
      "step": 26340
    },
    {
      "epoch": 0.8373836525937927,
      "grad_norm": 0.7645226716995239,
      "learning_rate": 1.369737605596022e-05,
      "loss": 0.503,
      "step": 26360
    },
    {
      "epoch": 0.8380189967915118,
      "grad_norm": 0.8403562903404236,
      "learning_rate": 1.3593608885523158e-05,
      "loss": 0.4766,
      "step": 26380
    },
    {
      "epoch": 0.8386543409892309,
      "grad_norm": 0.7841979265213013,
      "learning_rate": 1.3490207594561366e-05,
      "loss": 0.4917,
      "step": 26400
    },
    {
      "epoch": 0.83928968518695,
      "grad_norm": 0.8631531000137329,
      "learning_rate": 1.3392315662821897e-05,
      "loss": 0.4972,
      "step": 26420
    },
    {
      "epoch": 0.8399250293846692,
      "grad_norm": 1.0436699390411377,
      "learning_rate": 1.3289629094769217e-05,
      "loss": 0.4847,
      "step": 26440
    },
    {
      "epoch": 0.8405603735823882,
      "grad_norm": 0.9521028399467468,
      "learning_rate": 1.318730969336468e-05,
      "loss": 0.4972,
      "step": 26460
    },
    {
      "epoch": 0.8411957177801074,
      "grad_norm": 0.9861098527908325,
      "learning_rate": 1.3085357891869909e-05,
      "loss": 0.5114,
      "step": 26480
    },
    {
      "epoch": 0.8418310619778265,
      "grad_norm": 1.3008265495300293,
      "learning_rate": 1.2983774121989888e-05,
      "loss": 0.5071,
      "step": 26500
    },
    {
      "epoch": 0.8424664061755456,
      "grad_norm": 0.7970487475395203,
      "learning_rate": 1.2882558813871204e-05,
      "loss": 0.4945,
      "step": 26520
    },
    {
      "epoch": 0.8431017503732647,
      "grad_norm": 0.7304345369338989,
      "learning_rate": 1.2781712396100287e-05,
      "loss": 0.4902,
      "step": 26540
    },
    {
      "epoch": 0.8437370945709838,
      "grad_norm": 0.9716693162918091,
      "learning_rate": 1.2681235295701488e-05,
      "loss": 0.4857,
      "step": 26560
    },
    {
      "epoch": 0.8443724387687029,
      "grad_norm": 0.9461120963096619,
      "learning_rate": 1.2581127938135328e-05,
      "loss": 0.5139,
      "step": 26580
    },
    {
      "epoch": 0.8450077829664221,
      "grad_norm": 0.8130011558532715,
      "learning_rate": 1.2481390747296717e-05,
      "loss": 0.4788,
      "step": 26600
    },
    {
      "epoch": 0.8456431271641411,
      "grad_norm": 0.959818959236145,
      "learning_rate": 1.2382024145513094e-05,
      "loss": 0.4808,
      "step": 26620
    },
    {
      "epoch": 0.8462784713618603,
      "grad_norm": 1.2069573402404785,
      "learning_rate": 1.2283028553542674e-05,
      "loss": 0.4692,
      "step": 26640
    },
    {
      "epoch": 0.8469138155595795,
      "grad_norm": 1.0251085758209229,
      "learning_rate": 1.2184404390572712e-05,
      "loss": 0.5106,
      "step": 26660
    },
    {
      "epoch": 0.8475491597572985,
      "grad_norm": 0.9423872828483582,
      "learning_rate": 1.2086152074217638e-05,
      "loss": 0.4881,
      "step": 26680
    },
    {
      "epoch": 0.8481845039550177,
      "grad_norm": 0.8245638608932495,
      "learning_rate": 1.1988272020517322e-05,
      "loss": 0.4606,
      "step": 26700
    },
    {
      "epoch": 0.8488198481527367,
      "grad_norm": 1.0099587440490723,
      "learning_rate": 1.1890764643935393e-05,
      "loss": 0.4976,
      "step": 26720
    },
    {
      "epoch": 0.8494551923504559,
      "grad_norm": 0.8285634517669678,
      "learning_rate": 1.1793630357357355e-05,
      "loss": 0.5057,
      "step": 26740
    },
    {
      "epoch": 0.850090536548175,
      "grad_norm": 0.9125322699546814,
      "learning_rate": 1.169686957208892e-05,
      "loss": 0.4856,
      "step": 26760
    },
    {
      "epoch": 0.8507258807458941,
      "grad_norm": 1.1413007974624634,
      "learning_rate": 1.1600482697854198e-05,
      "loss": 0.4916,
      "step": 26780
    },
    {
      "epoch": 0.8513612249436132,
      "grad_norm": 0.9246459603309631,
      "learning_rate": 1.1504470142794121e-05,
      "loss": 0.4807,
      "step": 26800
    },
    {
      "epoch": 0.8519965691413324,
      "grad_norm": 0.9050401449203491,
      "learning_rate": 1.140883231346449e-05,
      "loss": 0.4844,
      "step": 26820
    },
    {
      "epoch": 0.8526319133390514,
      "grad_norm": 0.8217797875404358,
      "learning_rate": 1.1313569614834408e-05,
      "loss": 0.4751,
      "step": 26840
    },
    {
      "epoch": 0.8532672575367706,
      "grad_norm": 1.0189076662063599,
      "learning_rate": 1.1218682450284545e-05,
      "loss": 0.4949,
      "step": 26860
    },
    {
      "epoch": 0.8539026017344896,
      "grad_norm": 0.7574889659881592,
      "learning_rate": 1.112417122160535e-05,
      "loss": 0.4738,
      "step": 26880
    },
    {
      "epoch": 0.8545379459322088,
      "grad_norm": 0.6649676561355591,
      "learning_rate": 1.1030036328995497e-05,
      "loss": 0.4859,
      "step": 26900
    },
    {
      "epoch": 0.8551732901299279,
      "grad_norm": 0.7144981622695923,
      "learning_rate": 1.0936278171060032e-05,
      "loss": 0.4799,
      "step": 26920
    },
    {
      "epoch": 0.855808634327647,
      "grad_norm": 0.9074038863182068,
      "learning_rate": 1.0842897144808762e-05,
      "loss": 0.4951,
      "step": 26940
    },
    {
      "epoch": 0.8564439785253661,
      "grad_norm": 0.9271389842033386,
      "learning_rate": 1.0749893645654551e-05,
      "loss": 0.4692,
      "step": 26960
    },
    {
      "epoch": 0.8570793227230852,
      "grad_norm": 0.9277658462524414,
      "learning_rate": 1.0657268067411752e-05,
      "loss": 0.4711,
      "step": 26980
    },
    {
      "epoch": 0.8577146669208043,
      "grad_norm": 1.5766148567199707,
      "learning_rate": 1.0565020802294357e-05,
      "loss": 0.5081,
      "step": 27000
    },
    {
      "epoch": 0.8577146669208043,
      "eval_loss": 0.4444785416126251,
      "eval_runtime": 45.2678,
      "eval_samples_per_second": 59.711,
      "eval_steps_per_second": 29.867,
      "step": 27000
    },
    {
      "epoch": 0.8583500111185235,
      "grad_norm": 0.7567349076271057,
      "learning_rate": 1.0473152240914419e-05,
      "loss": 0.4671,
      "step": 27020
    },
    {
      "epoch": 0.8589853553162425,
      "grad_norm": 1.0230178833007812,
      "learning_rate": 1.0381662772280498e-05,
      "loss": 0.4874,
      "step": 27040
    },
    {
      "epoch": 0.8596206995139617,
      "grad_norm": 0.7454288005828857,
      "learning_rate": 1.0290552783795849e-05,
      "loss": 0.4825,
      "step": 27060
    },
    {
      "epoch": 0.8602560437116809,
      "grad_norm": 0.9813241958618164,
      "learning_rate": 1.0199822661256852e-05,
      "loss": 0.4785,
      "step": 27080
    },
    {
      "epoch": 0.8608913879093999,
      "grad_norm": 0.8269158005714417,
      "learning_rate": 1.0109472788851427e-05,
      "loss": 0.4797,
      "step": 27100
    },
    {
      "epoch": 0.861526732107119,
      "grad_norm": 0.8101191520690918,
      "learning_rate": 1.001950354915734e-05,
      "loss": 0.4735,
      "step": 27120
    },
    {
      "epoch": 0.8621620763048381,
      "grad_norm": 0.903421938419342,
      "learning_rate": 9.929915323140571e-06,
      "loss": 0.5,
      "step": 27140
    },
    {
      "epoch": 0.8627974205025573,
      "grad_norm": 0.7358487248420715,
      "learning_rate": 9.840708490153817e-06,
      "loss": 0.4799,
      "step": 27160
    },
    {
      "epoch": 0.8634327647002764,
      "grad_norm": 0.9838561415672302,
      "learning_rate": 9.751883427934717e-06,
      "loss": 0.506,
      "step": 27180
    },
    {
      "epoch": 0.8640681088979955,
      "grad_norm": 0.9448813796043396,
      "learning_rate": 9.66344051260436e-06,
      "loss": 0.4966,
      "step": 27200
    },
    {
      "epoch": 0.8647034530957146,
      "grad_norm": 1.111055612564087,
      "learning_rate": 9.575380118665733e-06,
      "loss": 0.5118,
      "step": 27220
    },
    {
      "epoch": 0.8653387972934338,
      "grad_norm": 0.968305230140686,
      "learning_rate": 9.487702619001992e-06,
      "loss": 0.5002,
      "step": 27240
    },
    {
      "epoch": 0.8659741414911528,
      "grad_norm": 0.8771995902061462,
      "learning_rate": 9.400408384874992e-06,
      "loss": 0.497,
      "step": 27260
    },
    {
      "epoch": 0.866609485688872,
      "grad_norm": 1.0422018766403198,
      "learning_rate": 9.31349778592373e-06,
      "loss": 0.5081,
      "step": 27280
    },
    {
      "epoch": 0.867244829886591,
      "grad_norm": 0.8950514197349548,
      "learning_rate": 9.22697119016267e-06,
      "loss": 0.4957,
      "step": 27300
    },
    {
      "epoch": 0.8678801740843102,
      "grad_norm": 0.8093190789222717,
      "learning_rate": 9.140828963980297e-06,
      "loss": 0.4667,
      "step": 27320
    },
    {
      "epoch": 0.8685155182820293,
      "grad_norm": 0.8465502262115479,
      "learning_rate": 9.055071472137466e-06,
      "loss": 0.4913,
      "step": 27340
    },
    {
      "epoch": 0.8691508624797484,
      "grad_norm": 0.8349893093109131,
      "learning_rate": 8.969699077766014e-06,
      "loss": 0.4738,
      "step": 27360
    },
    {
      "epoch": 0.8697862066774675,
      "grad_norm": 0.831910252571106,
      "learning_rate": 8.884712142367024e-06,
      "loss": 0.4923,
      "step": 27380
    },
    {
      "epoch": 0.8704215508751866,
      "grad_norm": 0.9581566452980042,
      "learning_rate": 8.80011102580941e-06,
      "loss": 0.4856,
      "step": 27400
    },
    {
      "epoch": 0.8710568950729057,
      "grad_norm": 0.823250412940979,
      "learning_rate": 8.720097656085246e-06,
      "loss": 0.4886,
      "step": 27420
    },
    {
      "epoch": 0.8716922392706249,
      "grad_norm": 0.988389253616333,
      "learning_rate": 8.636249915153039e-06,
      "loss": 0.4946,
      "step": 27440
    },
    {
      "epoch": 0.8723275834683439,
      "grad_norm": 0.85055011510849,
      "learning_rate": 8.55695289500451e-06,
      "loss": 0.4885,
      "step": 27460
    },
    {
      "epoch": 0.8729629276660631,
      "grad_norm": 0.9092792272567749,
      "learning_rate": 8.473859879755397e-06,
      "loss": 0.4631,
      "step": 27480
    },
    {
      "epoch": 0.8735982718637822,
      "grad_norm": 0.930949330329895,
      "learning_rate": 8.39115442306171e-06,
      "loss": 0.4955,
      "step": 27500
    },
    {
      "epoch": 0.8742336160615013,
      "grad_norm": 0.7822802066802979,
      "learning_rate": 8.308836875131665e-06,
      "loss": 0.4842,
      "step": 27520
    },
    {
      "epoch": 0.8748689602592205,
      "grad_norm": 0.7877179384231567,
      "learning_rate": 8.22690758453094e-06,
      "loss": 0.5006,
      "step": 27540
    },
    {
      "epoch": 0.8755043044569395,
      "grad_norm": 0.9965065717697144,
      "learning_rate": 8.145366898181139e-06,
      "loss": 0.4866,
      "step": 27560
    },
    {
      "epoch": 0.8761396486546587,
      "grad_norm": 1.1015229225158691,
      "learning_rate": 8.064215161358402e-06,
      "loss": 0.5203,
      "step": 27580
    },
    {
      "epoch": 0.8767749928523778,
      "grad_norm": 0.7929244637489319,
      "learning_rate": 7.983452717691852e-06,
      "loss": 0.477,
      "step": 27600
    },
    {
      "epoch": 0.8774103370500969,
      "grad_norm": 1.0685256719589233,
      "learning_rate": 7.903079909162258e-06,
      "loss": 0.5385,
      "step": 27620
    },
    {
      "epoch": 0.878045681247816,
      "grad_norm": 1.0020925998687744,
      "learning_rate": 7.82309707610046e-06,
      "loss": 0.5061,
      "step": 27640
    },
    {
      "epoch": 0.8786810254455352,
      "grad_norm": 0.8348806500434875,
      "learning_rate": 7.743504557185976e-06,
      "loss": 0.505,
      "step": 27660
    },
    {
      "epoch": 0.8793163696432542,
      "grad_norm": 0.8327703475952148,
      "learning_rate": 7.664302689445635e-06,
      "loss": 0.4633,
      "step": 27680
    },
    {
      "epoch": 0.8799517138409734,
      "grad_norm": 0.9524950385093689,
      "learning_rate": 7.5854918082520435e-06,
      "loss": 0.4859,
      "step": 27700
    },
    {
      "epoch": 0.8805870580386924,
      "grad_norm": 0.8677568435668945,
      "learning_rate": 7.507072247322211e-06,
      "loss": 0.4832,
      "step": 27720
    },
    {
      "epoch": 0.8812224022364116,
      "grad_norm": 0.9326565265655518,
      "learning_rate": 7.429044338716196e-06,
      "loss": 0.493,
      "step": 27740
    },
    {
      "epoch": 0.8818577464341307,
      "grad_norm": 0.7510032057762146,
      "learning_rate": 7.35140841283557e-06,
      "loss": 0.489,
      "step": 27760
    },
    {
      "epoch": 0.8824930906318498,
      "grad_norm": 0.7510486841201782,
      "learning_rate": 7.274164798422134e-06,
      "loss": 0.4741,
      "step": 27780
    },
    {
      "epoch": 0.8831284348295689,
      "grad_norm": 0.8744218945503235,
      "learning_rate": 7.197313822556462e-06,
      "loss": 0.4698,
      "step": 27800
    },
    {
      "epoch": 0.8837637790272881,
      "grad_norm": 0.7554096579551697,
      "learning_rate": 7.12085581065658e-06,
      "loss": 0.4561,
      "step": 27820
    },
    {
      "epoch": 0.8843991232250071,
      "grad_norm": 1.0702250003814697,
      "learning_rate": 7.044791086476499e-06,
      "loss": 0.5074,
      "step": 27840
    },
    {
      "epoch": 0.8850344674227263,
      "grad_norm": 1.2190712690353394,
      "learning_rate": 6.969119972104898e-06,
      "loss": 0.4873,
      "step": 27860
    },
    {
      "epoch": 0.8856698116204453,
      "grad_norm": 0.8235007524490356,
      "learning_rate": 6.893842787963789e-06,
      "loss": 0.4884,
      "step": 27880
    },
    {
      "epoch": 0.8863051558181645,
      "grad_norm": 0.8809916973114014,
      "learning_rate": 6.818959852807083e-06,
      "loss": 0.4746,
      "step": 27900
    },
    {
      "epoch": 0.8869405000158836,
      "grad_norm": 0.8362717628479004,
      "learning_rate": 6.744471483719306e-06,
      "loss": 0.5139,
      "step": 27920
    },
    {
      "epoch": 0.8875758442136027,
      "grad_norm": 0.9398446083068848,
      "learning_rate": 6.67037799611423e-06,
      "loss": 0.5002,
      "step": 27940
    },
    {
      "epoch": 0.8882111884113219,
      "grad_norm": 0.750577449798584,
      "learning_rate": 6.596679703733544e-06,
      "loss": 0.4965,
      "step": 27960
    },
    {
      "epoch": 0.8888465326090409,
      "grad_norm": 1.0199640989303589,
      "learning_rate": 6.523376918645474e-06,
      "loss": 0.5101,
      "step": 27980
    },
    {
      "epoch": 0.88948187680676,
      "grad_norm": 0.8302307724952698,
      "learning_rate": 6.4504699512435985e-06,
      "loss": 0.4608,
      "step": 28000
    },
    {
      "epoch": 0.88948187680676,
      "eval_loss": 0.4442509412765503,
      "eval_runtime": 44.8835,
      "eval_samples_per_second": 60.223,
      "eval_steps_per_second": 30.122,
      "step": 28000
    },
    {
      "epoch": 0.8901172210044792,
      "grad_norm": 0.7648799419403076,
      "learning_rate": 6.377959110245357e-06,
      "loss": 0.4704,
      "step": 28020
    },
    {
      "epoch": 0.8907525652021983,
      "grad_norm": 0.8950293064117432,
      "learning_rate": 6.305844702690878e-06,
      "loss": 0.4906,
      "step": 28040
    },
    {
      "epoch": 0.8913879093999174,
      "grad_norm": 0.9124616384506226,
      "learning_rate": 6.234127033941628e-06,
      "loss": 0.4939,
      "step": 28060
    },
    {
      "epoch": 0.8920232535976366,
      "grad_norm": 0.8970253467559814,
      "learning_rate": 6.1628064076791e-06,
      "loss": 0.5088,
      "step": 28080
    },
    {
      "epoch": 0.8926585977953556,
      "grad_norm": 0.9791019558906555,
      "learning_rate": 6.091883125903575e-06,
      "loss": 0.4613,
      "step": 28100
    },
    {
      "epoch": 0.8932939419930748,
      "grad_norm": 1.3384908437728882,
      "learning_rate": 6.021357488932789e-06,
      "loss": 0.4737,
      "step": 28120
    },
    {
      "epoch": 0.8939292861907938,
      "grad_norm": 1.076692819595337,
      "learning_rate": 5.951229795400726e-06,
      "loss": 0.5094,
      "step": 28140
    },
    {
      "epoch": 0.894564630388513,
      "grad_norm": 0.9772495031356812,
      "learning_rate": 5.881500342256285e-06,
      "loss": 0.4791,
      "step": 28160
    },
    {
      "epoch": 0.8951999745862321,
      "grad_norm": 0.946626603603363,
      "learning_rate": 5.8121694247620485e-06,
      "loss": 0.4843,
      "step": 28180
    },
    {
      "epoch": 0.8958353187839512,
      "grad_norm": 0.9328265190124512,
      "learning_rate": 5.74323733649309e-06,
      "loss": 0.4822,
      "step": 28200
    },
    {
      "epoch": 0.8964706629816703,
      "grad_norm": 0.7450932264328003,
      "learning_rate": 5.674704369335637e-06,
      "loss": 0.4746,
      "step": 28220
    },
    {
      "epoch": 0.8971060071793895,
      "grad_norm": 1.0023432970046997,
      "learning_rate": 5.606570813485856e-06,
      "loss": 0.4941,
      "step": 28240
    },
    {
      "epoch": 0.8977413513771085,
      "grad_norm": 0.8717949986457825,
      "learning_rate": 5.538836957448712e-06,
      "loss": 0.4801,
      "step": 28260
    },
    {
      "epoch": 0.8983766955748277,
      "grad_norm": 0.8665459156036377,
      "learning_rate": 5.474860277416504e-06,
      "loss": 0.4782,
      "step": 28280
    },
    {
      "epoch": 0.8990120397725467,
      "grad_norm": 0.8660995364189148,
      "learning_rate": 5.407906659415618e-06,
      "loss": 0.4788,
      "step": 28300
    },
    {
      "epoch": 0.8996473839702659,
      "grad_norm": 0.9390355944633484,
      "learning_rate": 5.341353582451425e-06,
      "loss": 0.478,
      "step": 28320
    },
    {
      "epoch": 0.900282728167985,
      "grad_norm": 0.8287180662155151,
      "learning_rate": 5.275201328336477e-06,
      "loss": 0.4846,
      "step": 28340
    },
    {
      "epoch": 0.9009180723657041,
      "grad_norm": 0.8496334552764893,
      "learning_rate": 5.209450177186081e-06,
      "loss": 0.4838,
      "step": 28360
    },
    {
      "epoch": 0.9015534165634232,
      "grad_norm": 0.9892422556877136,
      "learning_rate": 5.144100407417063e-06,
      "loss": 0.4854,
      "step": 28380
    },
    {
      "epoch": 0.9021887607611423,
      "grad_norm": 0.9813452363014221,
      "learning_rate": 5.0791522957467365e-06,
      "loss": 0.4916,
      "step": 28400
    },
    {
      "epoch": 0.9028241049588615,
      "grad_norm": 0.9126195907592773,
      "learning_rate": 5.014606117191545e-06,
      "loss": 0.4949,
      "step": 28420
    },
    {
      "epoch": 0.9034594491565806,
      "grad_norm": 0.8669445514678955,
      "learning_rate": 4.950462145066015e-06,
      "loss": 0.482,
      "step": 28440
    },
    {
      "epoch": 0.9040947933542997,
      "grad_norm": 0.9803065657615662,
      "learning_rate": 4.886720650981569e-06,
      "loss": 0.5025,
      "step": 28460
    },
    {
      "epoch": 0.9047301375520188,
      "grad_norm": 0.9414586424827576,
      "learning_rate": 4.823381904845392e-06,
      "loss": 0.4856,
      "step": 28480
    },
    {
      "epoch": 0.905365481749738,
      "grad_norm": 0.9295367002487183,
      "learning_rate": 4.760446174859224e-06,
      "loss": 0.4876,
      "step": 28500
    },
    {
      "epoch": 0.906000825947457,
      "grad_norm": 0.8859279751777649,
      "learning_rate": 4.697913727518332e-06,
      "loss": 0.5152,
      "step": 28520
    },
    {
      "epoch": 0.9066361701451762,
      "grad_norm": 0.7441398501396179,
      "learning_rate": 4.63578482761029e-06,
      "loss": 0.4787,
      "step": 28540
    },
    {
      "epoch": 0.9072715143428952,
      "grad_norm": 1.459954023361206,
      "learning_rate": 4.574059738213876e-06,
      "loss": 0.4813,
      "step": 28560
    },
    {
      "epoch": 0.9079068585406144,
      "grad_norm": 0.9451243281364441,
      "learning_rate": 4.512738720698018e-06,
      "loss": 0.4835,
      "step": 28580
    },
    {
      "epoch": 0.9085422027383335,
      "grad_norm": 0.8990492820739746,
      "learning_rate": 4.451822034720587e-06,
      "loss": 0.4811,
      "step": 28600
    },
    {
      "epoch": 0.9091775469360526,
      "grad_norm": 0.7530508637428284,
      "learning_rate": 4.3913099382273835e-06,
      "loss": 0.5,
      "step": 28620
    },
    {
      "epoch": 0.9098128911337717,
      "grad_norm": 0.8113830089569092,
      "learning_rate": 4.331202687451019e-06,
      "loss": 0.5075,
      "step": 28640
    },
    {
      "epoch": 0.9104482353314909,
      "grad_norm": 0.8615418672561646,
      "learning_rate": 4.2715005369097895e-06,
      "loss": 0.5152,
      "step": 28660
    },
    {
      "epoch": 0.9110835795292099,
      "grad_norm": 0.8459773659706116,
      "learning_rate": 4.212203739406673e-06,
      "loss": 0.4804,
      "step": 28680
    },
    {
      "epoch": 0.9117189237269291,
      "grad_norm": 0.8821284770965576,
      "learning_rate": 4.153312546028199e-06,
      "loss": 0.5311,
      "step": 28700
    },
    {
      "epoch": 0.9123542679246481,
      "grad_norm": 1.0187216997146606,
      "learning_rate": 4.0948272061434035e-06,
      "loss": 0.4632,
      "step": 28720
    },
    {
      "epoch": 0.9129896121223673,
      "grad_norm": 0.9274182915687561,
      "learning_rate": 4.036747967402788e-06,
      "loss": 0.4832,
      "step": 28740
    },
    {
      "epoch": 0.9136249563200864,
      "grad_norm": 0.7573745846748352,
      "learning_rate": 3.979075075737226e-06,
      "loss": 0.4905,
      "step": 28760
    },
    {
      "epoch": 0.9142603005178055,
      "grad_norm": 0.9005789160728455,
      "learning_rate": 3.921808775357027e-06,
      "loss": 0.5114,
      "step": 28780
    },
    {
      "epoch": 0.9148956447155246,
      "grad_norm": 0.9073104858398438,
      "learning_rate": 3.864949308750743e-06,
      "loss": 0.5018,
      "step": 28800
    },
    {
      "epoch": 0.9155309889132438,
      "grad_norm": 0.7230907678604126,
      "learning_rate": 3.808496916684268e-06,
      "loss": 0.4954,
      "step": 28820
    },
    {
      "epoch": 0.9161663331109628,
      "grad_norm": 0.7139384746551514,
      "learning_rate": 3.7524518381997885e-06,
      "loss": 0.464,
      "step": 28840
    },
    {
      "epoch": 0.916801677308682,
      "grad_norm": 0.8710399866104126,
      "learning_rate": 3.696814310614749e-06,
      "loss": 0.5048,
      "step": 28860
    },
    {
      "epoch": 0.917437021506401,
      "grad_norm": 0.87566739320755,
      "learning_rate": 3.6415845695208505e-06,
      "loss": 0.484,
      "step": 28880
    },
    {
      "epoch": 0.9180723657041202,
      "grad_norm": 0.9447526335716248,
      "learning_rate": 3.586762848783076e-06,
      "loss": 0.5032,
      "step": 28900
    },
    {
      "epoch": 0.9187077099018394,
      "grad_norm": 0.7784162759780884,
      "learning_rate": 3.53234938053868e-06,
      "loss": 0.4451,
      "step": 28920
    },
    {
      "epoch": 0.9193430540995584,
      "grad_norm": 0.9225743412971497,
      "learning_rate": 3.478344395196198e-06,
      "loss": 0.4745,
      "step": 28940
    },
    {
      "epoch": 0.9199783982972776,
      "grad_norm": 0.9712013602256775,
      "learning_rate": 3.4247481214345177e-06,
      "loss": 0.4956,
      "step": 28960
    },
    {
      "epoch": 0.9206137424949966,
      "grad_norm": 1.2805237770080566,
      "learning_rate": 3.371560786201855e-06,
      "loss": 0.4971,
      "step": 28980
    },
    {
      "epoch": 0.9212490866927158,
      "grad_norm": 0.7866525053977966,
      "learning_rate": 3.3187826147147994e-06,
      "loss": 0.497,
      "step": 29000
    },
    {
      "epoch": 0.9212490866927158,
      "eval_loss": 0.44399821758270264,
      "eval_runtime": 45.0357,
      "eval_samples_per_second": 60.019,
      "eval_steps_per_second": 30.021,
      "step": 29000
    },
    {
      "epoch": 0.9218844308904349,
      "grad_norm": 0.7901077270507812,
      "learning_rate": 3.2664138304574153e-06,
      "loss": 0.514,
      "step": 29020
    },
    {
      "epoch": 0.922519775088154,
      "grad_norm": 1.0464386940002441,
      "learning_rate": 3.2144546551802323e-06,
      "loss": 0.5042,
      "step": 29040
    },
    {
      "epoch": 0.9231551192858731,
      "grad_norm": 0.8520443439483643,
      "learning_rate": 3.162905308899322e-06,
      "loss": 0.4858,
      "step": 29060
    },
    {
      "epoch": 0.9237904634835923,
      "grad_norm": 0.92030268907547,
      "learning_rate": 3.1117660098953895e-06,
      "loss": 0.4766,
      "step": 29080
    },
    {
      "epoch": 0.9244258076813113,
      "grad_norm": 0.7019485235214233,
      "learning_rate": 3.06103697471285e-06,
      "loss": 0.4903,
      "step": 29100
    },
    {
      "epoch": 0.9250611518790305,
      "grad_norm": 1.3560097217559814,
      "learning_rate": 3.0107184181588643e-06,
      "loss": 0.5125,
      "step": 29120
    },
    {
      "epoch": 0.9256964960767495,
      "grad_norm": 0.9616526365280151,
      "learning_rate": 2.960810553302462e-06,
      "loss": 0.512,
      "step": 29140
    },
    {
      "epoch": 0.9263318402744687,
      "grad_norm": 1.1742409467697144,
      "learning_rate": 2.9113135914736856e-06,
      "loss": 0.5007,
      "step": 29160
    },
    {
      "epoch": 0.9269671844721878,
      "grad_norm": 0.8712571263313293,
      "learning_rate": 2.8622277422625907e-06,
      "loss": 0.4717,
      "step": 29180
    },
    {
      "epoch": 0.9276025286699069,
      "grad_norm": 0.8578605055809021,
      "learning_rate": 2.8135532135184384e-06,
      "loss": 0.4989,
      "step": 29200
    },
    {
      "epoch": 0.928237872867626,
      "grad_norm": 0.8551231026649475,
      "learning_rate": 2.7652902113488143e-06,
      "loss": 0.4825,
      "step": 29220
    },
    {
      "epoch": 0.9288732170653452,
      "grad_norm": 0.82204669713974,
      "learning_rate": 2.7174389401186996e-06,
      "loss": 0.4702,
      "step": 29240
    },
    {
      "epoch": 0.9295085612630642,
      "grad_norm": 0.9263904690742493,
      "learning_rate": 2.6699996024496575e-06,
      "loss": 0.4996,
      "step": 29260
    },
    {
      "epoch": 0.9301439054607834,
      "grad_norm": 1.037817120552063,
      "learning_rate": 2.6229723992189704e-06,
      "loss": 0.4986,
      "step": 29280
    },
    {
      "epoch": 0.9307792496585024,
      "grad_norm": 1.0528874397277832,
      "learning_rate": 2.5763575295587593e-06,
      "loss": 0.4794,
      "step": 29300
    },
    {
      "epoch": 0.9314145938562216,
      "grad_norm": 0.8765133619308472,
      "learning_rate": 2.5301551908551545e-06,
      "loss": 0.4878,
      "step": 29320
    },
    {
      "epoch": 0.9320499380539408,
      "grad_norm": 0.8322685956954956,
      "learning_rate": 2.484365578747494e-06,
      "loss": 0.4945,
      "step": 29340
    },
    {
      "epoch": 0.9326852822516598,
      "grad_norm": 0.8344667553901672,
      "learning_rate": 2.438988887127436e-06,
      "loss": 0.4981,
      "step": 29360
    },
    {
      "epoch": 0.933320626449379,
      "grad_norm": 0.8750690817832947,
      "learning_rate": 2.3940253081381703e-06,
      "loss": 0.4969,
      "step": 29380
    },
    {
      "epoch": 0.933955970647098,
      "grad_norm": 0.808814287185669,
      "learning_rate": 2.3494750321736093e-06,
      "loss": 0.4623,
      "step": 29400
    },
    {
      "epoch": 0.9345913148448172,
      "grad_norm": 0.9626306891441345,
      "learning_rate": 2.3053382478775754e-06,
      "loss": 0.5028,
      "step": 29420
    },
    {
      "epoch": 0.9352266590425363,
      "grad_norm": 0.9727978706359863,
      "learning_rate": 2.261615142143003e-06,
      "loss": 0.5059,
      "step": 29440
    },
    {
      "epoch": 0.9358620032402554,
      "grad_norm": 0.8926533460617065,
      "learning_rate": 2.2183059001111174e-06,
      "loss": 0.4764,
      "step": 29460
    },
    {
      "epoch": 0.9364973474379745,
      "grad_norm": 1.0506230592727661,
      "learning_rate": 2.1754107051707218e-06,
      "loss": 0.5069,
      "step": 29480
    },
    {
      "epoch": 0.9371326916356937,
      "grad_norm": 0.7190736532211304,
      "learning_rate": 2.1329297389573565e-06,
      "loss": 0.49,
      "step": 29500
    },
    {
      "epoch": 0.9377680358334127,
      "grad_norm": 0.7786980867385864,
      "learning_rate": 2.09086318135252e-06,
      "loss": 0.4766,
      "step": 29520
    },
    {
      "epoch": 0.9384033800311319,
      "grad_norm": 0.8696832060813904,
      "learning_rate": 2.049211210483004e-06,
      "loss": 0.4959,
      "step": 29540
    },
    {
      "epoch": 0.9390387242288509,
      "grad_norm": 0.7167271375656128,
      "learning_rate": 2.0079740027200144e-06,
      "loss": 0.4927,
      "step": 29560
    },
    {
      "epoch": 0.9396740684265701,
      "grad_norm": 0.868259072303772,
      "learning_rate": 1.967151732678518e-06,
      "loss": 0.4788,
      "step": 29580
    },
    {
      "epoch": 0.9403094126242892,
      "grad_norm": 0.8658266663551331,
      "learning_rate": 1.9267445732164325e-06,
      "loss": 0.4919,
      "step": 29600
    },
    {
      "epoch": 0.9409447568220083,
      "grad_norm": 1.010276436805725,
      "learning_rate": 1.8867526954339688e-06,
      "loss": 0.4811,
      "step": 29620
    },
    {
      "epoch": 0.9415801010197274,
      "grad_norm": 0.9376817941665649,
      "learning_rate": 1.8471762686728344e-06,
      "loss": 0.4723,
      "step": 29640
    },
    {
      "epoch": 0.9422154452174466,
      "grad_norm": 1.520297646522522,
      "learning_rate": 1.8080154605155996e-06,
      "loss": 0.5146,
      "step": 29660
    },
    {
      "epoch": 0.9428507894151656,
      "grad_norm": 0.8532717227935791,
      "learning_rate": 1.7692704367848756e-06,
      "loss": 0.4556,
      "step": 29680
    },
    {
      "epoch": 0.9434861336128848,
      "grad_norm": 1.069378137588501,
      "learning_rate": 1.730941361542704e-06,
      "loss": 0.4789,
      "step": 29700
    },
    {
      "epoch": 0.9441214778106038,
      "grad_norm": 0.8771205544471741,
      "learning_rate": 1.6930283970898574e-06,
      "loss": 0.4819,
      "step": 29720
    },
    {
      "epoch": 0.944756822008323,
      "grad_norm": 0.8729512095451355,
      "learning_rate": 1.6555317039650852e-06,
      "loss": 0.4792,
      "step": 29740
    },
    {
      "epoch": 0.9453921662060422,
      "grad_norm": 0.8724381923675537,
      "learning_rate": 1.6184514409444795e-06,
      "loss": 0.4726,
      "step": 29760
    },
    {
      "epoch": 0.9460275104037612,
      "grad_norm": 0.9022035598754883,
      "learning_rate": 1.5817877650408541e-06,
      "loss": 0.4891,
      "step": 29780
    },
    {
      "epoch": 0.9466628546014804,
      "grad_norm": 1.003596544265747,
      "learning_rate": 1.5455408315029562e-06,
      "loss": 0.4974,
      "step": 29800
    },
    {
      "epoch": 0.9472981987991995,
      "grad_norm": 0.8569382429122925,
      "learning_rate": 1.5097107938149113e-06,
      "loss": 0.4781,
      "step": 29820
    },
    {
      "epoch": 0.9479335429969186,
      "grad_norm": 0.9094131588935852,
      "learning_rate": 1.4742978036955457e-06,
      "loss": 0.5155,
      "step": 29840
    },
    {
      "epoch": 0.9485688871946377,
      "grad_norm": 1.0451712608337402,
      "learning_rate": 1.4393020110977206e-06,
      "loss": 0.4895,
      "step": 29860
    },
    {
      "epoch": 0.9492042313923568,
      "grad_norm": 1.2386709451675415,
      "learning_rate": 1.4047235642077217e-06,
      "loss": 0.4702,
      "step": 29880
    },
    {
      "epoch": 0.9498395755900759,
      "grad_norm": 0.966143786907196,
      "learning_rate": 1.3705626094446256e-06,
      "loss": 0.4962,
      "step": 29900
    },
    {
      "epoch": 0.9504749197877951,
      "grad_norm": 0.9544230103492737,
      "learning_rate": 1.33681929145969e-06,
      "loss": 0.4788,
      "step": 29920
    },
    {
      "epoch": 0.9511102639855141,
      "grad_norm": 0.8583151698112488,
      "learning_rate": 1.3034937531357095e-06,
      "loss": 0.477,
      "step": 29940
    },
    {
      "epoch": 0.9517456081832333,
      "grad_norm": 0.8361521363258362,
      "learning_rate": 1.270586135586427e-06,
      "loss": 0.5162,
      "step": 29960
    },
    {
      "epoch": 0.9523809523809523,
      "grad_norm": 1.0520914793014526,
      "learning_rate": 1.2380965781559783e-06,
      "loss": 0.4762,
      "step": 29980
    },
    {
      "epoch": 0.9530162965786715,
      "grad_norm": 0.8727782964706421,
      "learning_rate": 1.2060252184182386e-06,
      "loss": 0.4929,
      "step": 30000
    },
    {
      "epoch": 0.9530162965786715,
      "eval_loss": 0.443807452917099,
      "eval_runtime": 44.5933,
      "eval_samples_per_second": 60.614,
      "eval_steps_per_second": 30.318,
      "step": 30000
    },
    {
      "epoch": 0.9536516407763906,
      "grad_norm": 0.7989442944526672,
      "learning_rate": 1.174372192176254e-06,
      "loss": 0.4932,
      "step": 30020
    },
    {
      "epoch": 0.9542869849741097,
      "grad_norm": 0.7544863224029541,
      "learning_rate": 1.1431376334616994e-06,
      "loss": 0.482,
      "step": 30040
    },
    {
      "epoch": 0.9549223291718288,
      "grad_norm": 0.8897516131401062,
      "learning_rate": 1.1123216745342779e-06,
      "loss": 0.4898,
      "step": 30060
    },
    {
      "epoch": 0.955557673369548,
      "grad_norm": 0.8291769027709961,
      "learning_rate": 1.0819244458811773e-06,
      "loss": 0.5021,
      "step": 30080
    },
    {
      "epoch": 0.956193017567267,
      "grad_norm": 0.8413028717041016,
      "learning_rate": 1.0519460762165144e-06,
      "loss": 0.4762,
      "step": 30100
    },
    {
      "epoch": 0.9568283617649862,
      "grad_norm": 0.9216207265853882,
      "learning_rate": 1.0223866924807924e-06,
      "loss": 0.4869,
      "step": 30120
    },
    {
      "epoch": 0.9574637059627052,
      "grad_norm": 0.8935249447822571,
      "learning_rate": 9.932464198403325e-07,
      "loss": 0.4928,
      "step": 30140
    },
    {
      "epoch": 0.9580990501604244,
      "grad_norm": 0.7496423721313477,
      "learning_rate": 9.645253816867983e-07,
      "loss": 0.5266,
      "step": 30160
    },
    {
      "epoch": 0.9587343943581436,
      "grad_norm": 0.9738262295722961,
      "learning_rate": 9.362236996366514e-07,
      "loss": 0.4735,
      "step": 30180
    },
    {
      "epoch": 0.9593697385558626,
      "grad_norm": 0.9249958395957947,
      "learning_rate": 9.083414935305956e-07,
      "loss": 0.4706,
      "step": 30200
    },
    {
      "epoch": 0.9600050827535818,
      "grad_norm": 1.0667359828948975,
      "learning_rate": 8.808788814331448e-07,
      "loss": 0.4721,
      "step": 30220
    },
    {
      "epoch": 0.9606404269513009,
      "grad_norm": 0.8088135123252869,
      "learning_rate": 8.53835979632056e-07,
      "loss": 0.4884,
      "step": 30240
    },
    {
      "epoch": 0.96127577114902,
      "grad_norm": 0.9164936542510986,
      "learning_rate": 8.272129026378639e-07,
      "loss": 0.5022,
      "step": 30260
    },
    {
      "epoch": 0.9619111153467391,
      "grad_norm": 0.7835588455200195,
      "learning_rate": 8.010097631834245e-07,
      "loss": 0.4707,
      "step": 30280
    },
    {
      "epoch": 0.9625464595444582,
      "grad_norm": 1.2730233669281006,
      "learning_rate": 7.752266722233614e-07,
      "loss": 0.4795,
      "step": 30300
    },
    {
      "epoch": 0.9631818037421773,
      "grad_norm": 0.9977156519889832,
      "learning_rate": 7.511219051883567e-07,
      "loss": 0.5209,
      "step": 30320
    },
    {
      "epoch": 0.9638171479398965,
      "grad_norm": 0.941656231880188,
      "learning_rate": 7.26158221189377e-07,
      "loss": 0.4747,
      "step": 30340
    },
    {
      "epoch": 0.9644524921376155,
      "grad_norm": 0.7258419990539551,
      "learning_rate": 7.028320832731084e-07,
      "loss": 0.4961,
      "step": 30360
    },
    {
      "epoch": 0.9650878363353347,
      "grad_norm": 0.974557638168335,
      "learning_rate": 6.786882081830093e-07,
      "loss": 0.4559,
      "step": 30380
    },
    {
      "epoch": 0.9657231805330537,
      "grad_norm": 0.973461925983429,
      "learning_rate": 6.549648995460511e-07,
      "loss": 0.4931,
      "step": 30400
    },
    {
      "epoch": 0.9663585247307729,
      "grad_norm": 1.0066043138504028,
      "learning_rate": 6.31662257816279e-07,
      "loss": 0.4901,
      "step": 30420
    },
    {
      "epoch": 0.966993868928492,
      "grad_norm": 0.9339585900306702,
      "learning_rate": 6.087803816664628e-07,
      "loss": 0.4697,
      "step": 30440
    },
    {
      "epoch": 0.9676292131262111,
      "grad_norm": 0.8802968859672546,
      "learning_rate": 5.863193679877088e-07,
      "loss": 0.4943,
      "step": 30460
    },
    {
      "epoch": 0.9682645573239302,
      "grad_norm": 0.7557999491691589,
      "learning_rate": 5.6427931188896e-07,
      "loss": 0.4761,
      "step": 30480
    },
    {
      "epoch": 0.9688999015216494,
      "grad_norm": 0.9139352440834045,
      "learning_rate": 5.426603066967295e-07,
      "loss": 0.476,
      "step": 30500
    },
    {
      "epoch": 0.9695352457193684,
      "grad_norm": 0.9125082492828369,
      "learning_rate": 5.21462443954579e-07,
      "loss": 0.4792,
      "step": 30520
    },
    {
      "epoch": 0.9701705899170876,
      "grad_norm": 0.9351817965507507,
      "learning_rate": 5.006858134228076e-07,
      "loss": 0.4976,
      "step": 30540
    },
    {
      "epoch": 0.9708059341148066,
      "grad_norm": 0.743870735168457,
      "learning_rate": 4.803305030780302e-07,
      "loss": 0.4695,
      "step": 30560
    },
    {
      "epoch": 0.9714412783125258,
      "grad_norm": 0.9468183517456055,
      "learning_rate": 4.603965991128445e-07,
      "loss": 0.5027,
      "step": 30580
    },
    {
      "epoch": 0.972076622510245,
      "grad_norm": 1.1194064617156982,
      "learning_rate": 4.408841859354307e-07,
      "loss": 0.5146,
      "step": 30600
    },
    {
      "epoch": 0.972711966707964,
      "grad_norm": 0.7916650176048279,
      "learning_rate": 4.21793346169197e-07,
      "loss": 0.4689,
      "step": 30620
    },
    {
      "epoch": 0.9733473109056832,
      "grad_norm": 0.9158383011817932,
      "learning_rate": 4.0312416065245717e-07,
      "loss": 0.5272,
      "step": 30640
    },
    {
      "epoch": 0.9739826551034023,
      "grad_norm": 0.8861019015312195,
      "learning_rate": 3.8487670843807555e-07,
      "loss": 0.4981,
      "step": 30660
    },
    {
      "epoch": 0.9746179993011214,
      "grad_norm": 1.01827871799469,
      "learning_rate": 3.670510667931004e-07,
      "loss": 0.5386,
      "step": 30680
    },
    {
      "epoch": 0.9752533434988405,
      "grad_norm": 0.9622276425361633,
      "learning_rate": 3.496473111984866e-07,
      "loss": 0.5135,
      "step": 30700
    },
    {
      "epoch": 0.9758886876965596,
      "grad_norm": 1.0768787860870361,
      "learning_rate": 3.326655153487512e-07,
      "loss": 0.4943,
      "step": 30720
    },
    {
      "epoch": 0.9765240318942787,
      "grad_norm": 1.2705291509628296,
      "learning_rate": 3.16105751151663e-07,
      "loss": 0.4924,
      "step": 30740
    },
    {
      "epoch": 0.9771593760919979,
      "grad_norm": 0.9354774951934814,
      "learning_rate": 2.99968088727931e-07,
      "loss": 0.4811,
      "step": 30760
    },
    {
      "epoch": 0.9777947202897169,
      "grad_norm": 0.8442774415016174,
      "learning_rate": 2.842525964109166e-07,
      "loss": 0.4652,
      "step": 30780
    },
    {
      "epoch": 0.9784300644874361,
      "grad_norm": 0.9658933281898499,
      "learning_rate": 2.6895934074635533e-07,
      "loss": 0.4767,
      "step": 30800
    },
    {
      "epoch": 0.9790654086851552,
      "grad_norm": 0.9930063486099243,
      "learning_rate": 2.5408838649204625e-07,
      "loss": 0.4791,
      "step": 30820
    },
    {
      "epoch": 0.9797007528828743,
      "grad_norm": 0.9439179301261902,
      "learning_rate": 2.396397966176078e-07,
      "loss": 0.4833,
      "step": 30840
    },
    {
      "epoch": 0.9803360970805934,
      "grad_norm": 0.8499469757080078,
      "learning_rate": 2.25613632304178e-07,
      "loss": 0.4969,
      "step": 30860
    },
    {
      "epoch": 0.9809714412783125,
      "grad_norm": 1.0228259563446045,
      "learning_rate": 2.1200995294420323e-07,
      "loss": 0.4709,
      "step": 30880
    },
    {
      "epoch": 0.9816067854760316,
      "grad_norm": 1.1045747995376587,
      "learning_rate": 1.988288161411389e-07,
      "loss": 0.4964,
      "step": 30900
    },
    {
      "epoch": 0.9822421296737508,
      "grad_norm": 0.8404049277305603,
      "learning_rate": 1.8607027770921602e-07,
      "loss": 0.5289,
      "step": 30920
    },
    {
      "epoch": 0.9828774738714698,
      "grad_norm": 0.8583685755729675,
      "learning_rate": 1.7373439167325257e-07,
      "loss": 0.4824,
      "step": 30940
    },
    {
      "epoch": 0.983512818069189,
      "grad_norm": 0.8340322375297546,
      "learning_rate": 1.6240682931759622e-07,
      "loss": 0.5276,
      "step": 30960
    },
    {
      "epoch": 0.984148162266908,
      "grad_norm": 0.717254638671875,
      "learning_rate": 1.508952640646988e-07,
      "loss": 0.4837,
      "step": 30980
    },
    {
      "epoch": 0.9847835064646272,
      "grad_norm": 0.7109520435333252,
      "learning_rate": 1.3980650015292806e-07,
      "loss": 0.4805,
      "step": 31000
    },
    {
      "epoch": 0.9847835064646272,
      "eval_loss": 0.4438159465789795,
      "eval_runtime": 44.826,
      "eval_samples_per_second": 60.3,
      "eval_steps_per_second": 30.161,
      "step": 31000
    },
    {
      "epoch": 0.9854188506623464,
      "grad_norm": 0.8632842302322388,
      "learning_rate": 1.2914058453658008e-07,
      "loss": 0.4787,
      "step": 31020
    },
    {
      "epoch": 0.9860541948600654,
      "grad_norm": 0.9302808046340942,
      "learning_rate": 1.1889756237943861e-07,
      "loss": 0.4733,
      "step": 31040
    },
    {
      "epoch": 0.9866895390577846,
      "grad_norm": 1.0309478044509888,
      "learning_rate": 1.090774770545755e-07,
      "loss": 0.498,
      "step": 31060
    },
    {
      "epoch": 0.9873248832555037,
      "grad_norm": 0.7432119250297546,
      "learning_rate": 9.968037014420616e-08,
      "loss": 0.4909,
      "step": 31080
    },
    {
      "epoch": 0.9879602274532228,
      "grad_norm": 1.0406357049942017,
      "learning_rate": 9.070628143946768e-08,
      "loss": 0.4913,
      "step": 31100
    },
    {
      "epoch": 0.9885955716509419,
      "grad_norm": 0.8807629346847534,
      "learning_rate": 8.215524894024107e-08,
      "loss": 0.4843,
      "step": 31120
    },
    {
      "epoch": 0.989230915848661,
      "grad_norm": 0.815077006816864,
      "learning_rate": 7.402730885507359e-08,
      "loss": 0.4877,
      "step": 31140
    },
    {
      "epoch": 0.9898662600463801,
      "grad_norm": 0.8051480054855347,
      "learning_rate": 6.632249560092341e-08,
      "loss": 0.489,
      "step": 31160
    },
    {
      "epoch": 0.9905016042440993,
      "grad_norm": 0.8251180648803711,
      "learning_rate": 5.9040841803081895e-08,
      "loss": 0.4763,
      "step": 31180
    },
    {
      "epoch": 0.9911369484418183,
      "grad_norm": 0.8782890439033508,
      "learning_rate": 5.218237829499595e-08,
      "loss": 0.5012,
      "step": 31200
    },
    {
      "epoch": 0.9917722926395375,
      "grad_norm": 0.9451269507408142,
      "learning_rate": 4.574713411816811e-08,
      "loss": 0.4765,
      "step": 31220
    },
    {
      "epoch": 0.9924076368372566,
      "grad_norm": 1.2340540885925293,
      "learning_rate": 3.973513652202332e-08,
      "loss": 0.4999,
      "step": 31240
    },
    {
      "epoch": 0.9930429810349757,
      "grad_norm": 1.0101948976516724,
      "learning_rate": 3.414641096376459e-08,
      "loss": 0.5118,
      "step": 31260
    },
    {
      "epoch": 0.9936783252326948,
      "grad_norm": 0.7806993722915649,
      "learning_rate": 2.8980981108317485e-08,
      "loss": 0.5068,
      "step": 31280
    },
    {
      "epoch": 0.9943136694304139,
      "grad_norm": 1.1223636865615845,
      "learning_rate": 2.4238868828196927e-08,
      "loss": 0.5182,
      "step": 31300
    },
    {
      "epoch": 0.994949013628133,
      "grad_norm": 0.8514977693557739,
      "learning_rate": 1.9920094203418336e-08,
      "loss": 0.5072,
      "step": 31320
    },
    {
      "epoch": 0.9955843578258522,
      "grad_norm": 1.1318073272705078,
      "learning_rate": 1.6024675521397747e-08,
      "loss": 0.4819,
      "step": 31340
    },
    {
      "epoch": 0.9962197020235712,
      "grad_norm": 0.9314286708831787,
      "learning_rate": 1.2552629276929573e-08,
      "loss": 0.4957,
      "step": 31360
    },
    {
      "epoch": 0.9968550462212904,
      "grad_norm": 0.7769533395767212,
      "learning_rate": 9.503970172031196e-09,
      "loss": 0.5149,
      "step": 31380
    },
    {
      "epoch": 0.9974903904190096,
      "grad_norm": 0.7601432800292969,
      "learning_rate": 6.878711115976266e-09,
      "loss": 0.4933,
      "step": 31400
    },
    {
      "epoch": 0.9981257346167286,
      "grad_norm": 0.987147331237793,
      "learning_rate": 4.6768632251614765e-09,
      "loss": 0.4693,
      "step": 31420
    },
    {
      "epoch": 0.9987610788144478,
      "grad_norm": 0.8807405829429626,
      "learning_rate": 2.8984358230954577e-09,
      "loss": 0.474,
      "step": 31440
    },
    {
      "epoch": 0.9993964230121668,
      "grad_norm": 0.7518433332443237,
      "learning_rate": 1.5434364403543733e-09,
      "loss": 0.5076,
      "step": 31460
    },
    {
      "epoch": 1.0,
      "step": 31479,
      "total_flos": 0.0,
      "train_loss": 0.3508217529017671,
      "train_runtime": 14676.7422,
      "train_samples_per_second": 68.633,
      "train_steps_per_second": 2.145
    }
  ],
  "logging_steps": 20,
  "max_steps": 31479,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 1,
  "save_steps": 2000,
  "stateful_callbacks": {
    "TrainerControl": {
      "args": {
        "should_epoch_stop": false,
        "should_evaluate": false,
        "should_log": false,
        "should_save": true,
        "should_training_stop": true
      },
      "attributes": {}
    }
  },
  "total_flos": 0.0,
  "train_batch_size": 16,
  "trial_name": null,
  "trial_params": null
}