378 lines
56 KiB
Plaintext
378 lines
56 KiB
Plaintext
nohup: ignoring input
|
|
[Episode 10] reward=-56496290.6 actor_loss=0.3227 critic_loss=121984741831.1111 entropy=4.2577 ent_coef=0.002000 approx_kl=0.0020 kl_stop=1 intervention_rate=0.0977 front_blocked=0
|
|
[Episode 20] reward=-60611149.8 actor_loss=0.2760 critic_loss=128111700480.0000 entropy=4.2567 ent_coef=0.002000 approx_kl=0.0022 kl_stop=1 intervention_rate=0.1061 front_blocked=0
|
|
[Eval 20] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-647820.3 mean_steps=11.0
|
|
[Episode 30] reward=-63045787.2 actor_loss=0.1985 critic_loss=133642527350.1538 entropy=4.2622 ent_coef=0.002000 approx_kl=0.0026 kl_stop=1 intervention_rate=0.0931 front_blocked=0
|
|
[Episode 40] reward=-53271802.8 actor_loss=0.1966 critic_loss=122983175850.6667 entropy=4.2682 ent_coef=0.002000 approx_kl=0.0003 kl_stop=0 intervention_rate=0.0924 front_blocked=0
|
|
[Eval 40] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-400703.7 mean_steps=14.8
|
|
[Episode 50] reward=-40607038.8 actor_loss=0.2024 critic_loss=114208212204.3077 entropy=4.2755 ent_coef=0.002000 approx_kl=0.0035 kl_stop=1 intervention_rate=0.0853 front_blocked=0
|
|
[Episode 60] reward=-66300808.7 actor_loss=0.1677 critic_loss=130490473532.2353 entropy=4.2746 ent_coef=0.001999 approx_kl=0.0013 kl_stop=1 intervention_rate=0.0957 front_blocked=0
|
|
[Eval 60] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-544995.7 mean_steps=13.0
|
|
[Episode 70] reward=-54721000.1 actor_loss=0.1882 critic_loss=124476187079.1111 entropy=4.2771 ent_coef=0.001999 approx_kl=0.0028 kl_stop=1 intervention_rate=0.0951 front_blocked=0
|
|
[Episode 80] reward=-58580284.5 actor_loss=0.1303 critic_loss=127428338036.3636 entropy=4.2760 ent_coef=0.001999 approx_kl=0.0018 kl_stop=1 intervention_rate=0.0892 front_blocked=0
|
|
[Eval 80] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-477856.8 mean_steps=14.4
|
|
[Episode 90] reward=-53411034.7 actor_loss=0.1630 critic_loss=123062245533.5385 entropy=4.2738 ent_coef=0.001999 approx_kl=0.0021 kl_stop=1 intervention_rate=0.0905 front_blocked=0
|
|
[Episode 100] reward=-57669159.9 actor_loss=0.1305 critic_loss=127343330840.3810 entropy=4.2730 ent_coef=0.001999 approx_kl=0.0020 kl_stop=1 intervention_rate=0.0859 front_blocked=0
|
|
[Eval 100] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-486597.1 mean_steps=14.5
|
|
[Episode 110] reward=-58670049.1 actor_loss=0.1631 critic_loss=127637036400.6400 entropy=4.2753 ent_coef=0.001999 approx_kl=0.0015 kl_stop=1 intervention_rate=0.0911 front_blocked=0
|
|
[Episode 120] reward=-58657999.4 actor_loss=0.1676 critic_loss=126264424903.1111 entropy=4.2769 ent_coef=0.001999 approx_kl=0.0004 kl_stop=1 intervention_rate=0.0879 front_blocked=0
|
|
[Eval 120] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-531457.4 mean_steps=11.8
|
|
[Episode 130] reward=-54011232.6 actor_loss=0.2306 critic_loss=129836245772.1905 entropy=4.2733 ent_coef=0.001999 approx_kl=0.0013 kl_stop=1 intervention_rate=0.0879 front_blocked=0
|
|
[Episode 140] reward=-53796659.4 actor_loss=0.1312 critic_loss=121578362321.4545 entropy=4.2669 ent_coef=0.001999 approx_kl=0.0012 kl_stop=1 intervention_rate=0.0827 front_blocked=0
|
|
[Eval 140] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-630873.5 mean_steps=12.5
|
|
[Episode 150] reward=-56071938.5 actor_loss=0.1749 critic_loss=122299425751.0400 entropy=4.2607 ent_coef=0.001999 approx_kl=0.0025 kl_stop=1 intervention_rate=0.0879 front_blocked=0
|
|
[Episode 160] reward=-39575614.4 actor_loss=0.1461 critic_loss=114264811812.5714 entropy=4.2615 ent_coef=0.001999 approx_kl=0.0018 kl_stop=1 intervention_rate=0.0749 front_blocked=0
|
|
[Eval 160] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-457549.2 mean_steps=13.0
|
|
[Episode 170] reward=-49068837.5 actor_loss=0.1382 critic_loss=122159541816.8889 entropy=4.2583 ent_coef=0.001998 approx_kl=0.0006 kl_stop=1 intervention_rate=0.0749 front_blocked=0
|
|
[Episode 180] reward=-51909510.4 actor_loss=0.1713 critic_loss=121776541455.0588 entropy=4.2599 ent_coef=0.001998 approx_kl=0.0028 kl_stop=1 intervention_rate=0.0918 front_blocked=0
|
|
[Eval 180] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-452093.1 mean_steps=12.8
|
|
[Episode 190] reward=-65770859.0 actor_loss=0.1666 critic_loss=133417641984.0000 entropy=4.2542 ent_coef=0.001998 approx_kl=0.0045 kl_stop=1 intervention_rate=0.0964 front_blocked=0
|
|
[Episode 200] reward=-52114437.3 actor_loss=0.1067 critic_loss=126194403800.6154 entropy=4.2550 ent_coef=0.001998 approx_kl=0.0048 kl_stop=1 intervention_rate=0.0820 front_blocked=0
|
|
[Eval 200] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-523031.4 mean_steps=12.7
|
|
[Episode 210] reward=-67170668.6 actor_loss=0.1605 critic_loss=137164337643.5200 entropy=4.2506 ent_coef=0.001998 approx_kl=0.0021 kl_stop=1 intervention_rate=0.0970 front_blocked=0
|
|
[Episode 220] reward=-57076261.2 actor_loss=0.1871 critic_loss=125697804800.0000 entropy=4.2463 ent_coef=0.001998 approx_kl=0.0026 kl_stop=1 intervention_rate=0.0944 front_blocked=0
|
|
[Eval 220] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-520051.1 mean_steps=12.8
|
|
[Episode 230] reward=-47170080.9 actor_loss=0.1417 critic_loss=122106056919.5789 entropy=4.2453 ent_coef=0.001998 approx_kl=0.0049 kl_stop=1 intervention_rate=0.0827 front_blocked=0
|
|
[Episode 240] reward=-43889928.1 actor_loss=0.1452 critic_loss=119895527796.3636 entropy=4.2435 ent_coef=0.001998 approx_kl=0.0009 kl_stop=1 intervention_rate=0.0729 front_blocked=0
|
|
[Eval 240] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-359460.1 mean_steps=14.9
|
|
[Episode 250] reward=-56131162.1 actor_loss=0.1311 critic_loss=127673012224.0000 entropy=4.2424 ent_coef=0.001998 approx_kl=0.0030 kl_stop=1 intervention_rate=0.0807 front_blocked=0
|
|
[Episode 260] reward=-53532034.8 actor_loss=0.1215 critic_loss=124412613632.0000 entropy=4.2370 ent_coef=0.001998 approx_kl=0.0008 kl_stop=1 intervention_rate=0.0859 front_blocked=0
|
|
[Eval 260] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-474400.3 mean_steps=13.3
|
|
[Episode 270] reward=-48826672.0 actor_loss=0.1389 critic_loss=119337487473.7778 entropy=4.2359 ent_coef=0.001998 approx_kl=0.0045 kl_stop=1 intervention_rate=0.0840 front_blocked=0
|
|
[Episode 280] reward=-54100988.9 actor_loss=0.1315 critic_loss=127345608192.0000 entropy=4.2347 ent_coef=0.001997 approx_kl=0.0022 kl_stop=1 intervention_rate=0.0807 front_blocked=0
|
|
[Eval 280] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-382531.3 mean_steps=13.9
|
|
[Episode 290] reward=-49264785.9 actor_loss=0.0979 critic_loss=122663326720.0000 entropy=4.2324 ent_coef=0.001997 approx_kl=0.0014 kl_stop=1 intervention_rate=0.0749 front_blocked=0
|
|
[Episode 300] reward=-54752728.7 actor_loss=0.1382 critic_loss=126694122496.0000 entropy=4.2364 ent_coef=0.001997 approx_kl=0.0015 kl_stop=1 intervention_rate=0.0918 front_blocked=0
|
|
[Eval 300] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-399911.3 mean_steps=14.1
|
|
[Episode 310] reward=-50761792.3 actor_loss=0.0935 critic_loss=121983381682.0870 entropy=4.2377 ent_coef=0.001997 approx_kl=0.0025 kl_stop=1 intervention_rate=0.0755 front_blocked=0
|
|
[Episode 320] reward=-53445862.1 actor_loss=0.1365 critic_loss=128295692533.7600 entropy=4.2352 ent_coef=0.001997 approx_kl=0.0029 kl_stop=1 intervention_rate=0.0814 front_blocked=0
|
|
[Eval 320] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-500973.0 mean_steps=13.2
|
|
[Episode 330] reward=-41513419.5 actor_loss=0.1137 critic_loss=119585714858.6667 entropy=4.2309 ent_coef=0.001997 approx_kl=-0.0002 kl_stop=1 intervention_rate=0.0697 front_blocked=0
|
|
[Episode 340] reward=-65895625.7 actor_loss=0.1175 critic_loss=133780068352.0000 entropy=4.2281 ent_coef=0.001997 approx_kl=0.0009 kl_stop=1 intervention_rate=0.0859 front_blocked=0
|
|
[Eval 340] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-366366.4 mean_steps=15.2
|
|
[Episode 350] reward=-45519469.2 actor_loss=0.0977 critic_loss=119207410967.2727 entropy=4.2217 ent_coef=0.001997 approx_kl=0.0021 kl_stop=1 intervention_rate=0.0703 front_blocked=0
|
|
[Episode 360] reward=-64910313.7 actor_loss=0.1209 critic_loss=134707517952.0000 entropy=4.2215 ent_coef=0.001997 approx_kl=0.0001 kl_stop=1 intervention_rate=0.0938 front_blocked=0
|
|
[Eval 360] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-580392.3 mean_steps=11.3
|
|
[Episode 370] reward=-53801891.0 actor_loss=0.1473 critic_loss=127576801280.0000 entropy=4.2188 ent_coef=0.001997 approx_kl=0.0026 kl_stop=1 intervention_rate=0.0840 front_blocked=0
|
|
[Episode 380] reward=-50334279.5 actor_loss=0.1223 critic_loss=127097515008.0000 entropy=4.2114 ent_coef=0.001997 approx_kl=0.0032 kl_stop=1 intervention_rate=0.0755 front_blocked=0
|
|
[Eval 380] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-378348.7 mean_steps=13.3
|
|
[Episode 390] reward=-45637583.1 actor_loss=0.1578 critic_loss=122987661191.5294 entropy=4.2050 ent_coef=0.001996 approx_kl=0.0046 kl_stop=1 intervention_rate=0.0846 front_blocked=0
|
|
[Episode 400] reward=-47589863.2 actor_loss=0.1152 critic_loss=124237224891.7333 entropy=4.2063 ent_coef=0.001996 approx_kl=0.0011 kl_stop=1 intervention_rate=0.0775 front_blocked=0
|
|
[Eval 400] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-307878.5 mean_steps=15.2
|
|
[Episode 410] reward=-47522170.2 actor_loss=0.1285 critic_loss=120630910098.2857 entropy=4.2012 ent_coef=0.001996 approx_kl=0.0017 kl_stop=1 intervention_rate=0.0762 front_blocked=0
|
|
[Episode 420] reward=-56660727.8 actor_loss=0.1347 critic_loss=129353600133.5652 entropy=4.1991 ent_coef=0.001996 approx_kl=0.0014 kl_stop=1 intervention_rate=0.0846 front_blocked=0
|
|
[Eval 420] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-543654.3 mean_steps=12.3
|
|
[Episode 430] reward=-49220509.2 actor_loss=0.1268 critic_loss=126829160740.5714 entropy=4.1986 ent_coef=0.001996 approx_kl=0.0036 kl_stop=1 intervention_rate=0.0768 front_blocked=0
|
|
[Episode 440] reward=-39745843.4 actor_loss=0.1162 critic_loss=112348769581.1765 entropy=4.1991 ent_coef=0.001996 approx_kl=0.0029 kl_stop=1 intervention_rate=0.0716 front_blocked=0
|
|
[Eval 440] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-507364.4 mean_steps=13.3
|
|
[Episode 450] reward=-42625843.5 actor_loss=0.1240 critic_loss=117098818560.0000 entropy=4.2000 ent_coef=0.001996 approx_kl=0.0032 kl_stop=1 intervention_rate=0.0723 front_blocked=0
|
|
[Episode 460] reward=-46913927.1 actor_loss=0.1187 critic_loss=122213515673.6000 entropy=4.1933 ent_coef=0.001996 approx_kl=0.0025 kl_stop=1 intervention_rate=0.0775 front_blocked=0
|
|
[Eval 460] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-531365.2 mean_steps=13.4
|
|
[Episode 470] reward=-48195139.3 actor_loss=0.1388 critic_loss=123647064291.5556 entropy=4.1904 ent_coef=0.001996 approx_kl=0.0031 kl_stop=1 intervention_rate=0.0749 front_blocked=0
|
|
[Episode 480] reward=-55448488.8 actor_loss=0.1157 critic_loss=128298313216.0000 entropy=4.1902 ent_coef=0.001996 approx_kl=0.0014 kl_stop=1 intervention_rate=0.0840 front_blocked=0
|
|
[Eval 480] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-377425.3 mean_steps=14.2
|
|
[Episode 490] reward=-41778985.0 actor_loss=0.1335 critic_loss=118513517025.8824 entropy=4.1908 ent_coef=0.001996 approx_kl=0.0018 kl_stop=1 intervention_rate=0.0742 front_blocked=0
|
|
[Episode 500] reward=-48928781.6 actor_loss=0.1302 critic_loss=123309213882.1818 entropy=4.1891 ent_coef=0.001996 approx_kl=0.0012 kl_stop=1 intervention_rate=0.0742 front_blocked=0
|
|
[Eval 500] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-391654.5 mean_steps=15.1
|
|
[Episode 510] reward=-43458068.4 actor_loss=0.1581 critic_loss=121486024192.0000 entropy=4.1909 ent_coef=0.001995 approx_kl=0.0004 kl_stop=1 intervention_rate=0.0814 front_blocked=0
|
|
[Episode 520] reward=-52251644.3 actor_loss=0.1273 critic_loss=127484853248.0000 entropy=4.1840 ent_coef=0.001995 approx_kl=0.0021 kl_stop=1 intervention_rate=0.0781 front_blocked=0
|
|
[Eval 520] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-274979.1 mean_steps=15.7
|
|
[Episode 530] reward=-58842297.9 actor_loss=0.1169 critic_loss=125155077324.8000 entropy=4.1807 ent_coef=0.001995 approx_kl=0.0033 kl_stop=1 intervention_rate=0.0801 front_blocked=0
|
|
[Episode 540] reward=-39927144.7 actor_loss=0.0876 critic_loss=114764991566.7692 entropy=4.1810 ent_coef=0.001995 approx_kl=0.0026 kl_stop=1 intervention_rate=0.0716 front_blocked=0
|
|
[Eval 540] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-463039.5 mean_steps=13.2
|
|
[Episode 550] reward=-51087729.9 actor_loss=0.1440 critic_loss=121499171840.0000 entropy=4.1828 ent_coef=0.001995 approx_kl=0.0019 kl_stop=1 intervention_rate=0.0807 front_blocked=0
|
|
[Episode 560] reward=-65464642.7 actor_loss=0.1427 critic_loss=130300248610.1333 entropy=4.1805 ent_coef=0.001995 approx_kl=0.0018 kl_stop=1 intervention_rate=0.0924 front_blocked=0
|
|
[Eval 560] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-317893.4 mean_steps=16.1
|
|
[Episode 570] reward=-49133908.1 actor_loss=0.1132 critic_loss=125379599213.7143 entropy=4.1813 ent_coef=0.001995 approx_kl=0.0017 kl_stop=1 intervention_rate=0.0788 front_blocked=0
|
|
[Episode 580] reward=-40342749.1 actor_loss=0.1368 critic_loss=117497769984.0000 entropy=4.1787 ent_coef=0.001995 approx_kl=0.0020 kl_stop=1 intervention_rate=0.0794 front_blocked=0
|
|
[Eval 580] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-372622.1 mean_steps=13.8
|
|
[Episode 590] reward=-44890760.1 actor_loss=0.1239 critic_loss=118587565056.0000 entropy=4.1790 ent_coef=0.001995 approx_kl=0.0024 kl_stop=1 intervention_rate=0.0853 front_blocked=0
|
|
[Episode 600] reward=-39771247.2 actor_loss=0.1222 critic_loss=118625696808.9600 entropy=4.1729 ent_coef=0.001995 approx_kl=0.0025 kl_stop=1 intervention_rate=0.0742 front_blocked=0
|
|
[Eval 600] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-461029.0 mean_steps=14.1
|
|
[Episode 610] reward=-52821338.0 actor_loss=0.1201 critic_loss=126508147598.2222 entropy=4.1747 ent_coef=0.001995 approx_kl=0.0024 kl_stop=1 intervention_rate=0.0801 front_blocked=0
|
|
[Episode 620] reward=-52751099.6 actor_loss=0.1563 critic_loss=123214995456.0000 entropy=4.1809 ent_coef=0.001994 approx_kl=0.0027 kl_stop=1 intervention_rate=0.0892 front_blocked=0
|
|
[Eval 620] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-531685.5 mean_steps=12.9
|
|
[Episode 630] reward=-46774686.9 actor_loss=0.1100 critic_loss=120962139282.2857 entropy=4.1783 ent_coef=0.001994 approx_kl=0.0024 kl_stop=1 intervention_rate=0.0801 front_blocked=0
|
|
[Episode 640] reward=-39861458.7 actor_loss=0.1133 critic_loss=117034416810.6667 entropy=4.1753 ent_coef=0.001994 approx_kl=0.0034 kl_stop=1 intervention_rate=0.0690 front_blocked=0
|
|
[Eval 640] success_rate=0.650 qp_infeasible_rate=0.350 mean_return=-257016.6 mean_steps=16.4
|
|
[Episode 650] reward=-60172696.0 actor_loss=0.1197 critic_loss=130656425013.8947 entropy=4.1817 ent_coef=0.001994 approx_kl=0.0026 kl_stop=1 intervention_rate=0.0840 front_blocked=0
|
|
[Episode 660] reward=-51502239.9 actor_loss=0.1092 critic_loss=124843710756.5714 entropy=4.1842 ent_coef=0.001994 approx_kl=0.0035 kl_stop=1 intervention_rate=0.0781 front_blocked=0
|
|
[Eval 660] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-430705.5 mean_steps=14.3
|
|
[Episode 670] reward=-50081370.1 actor_loss=0.1011 critic_loss=123590824755.2000 entropy=4.1866 ent_coef=0.001994 approx_kl=0.0034 kl_stop=1 intervention_rate=0.0814 front_blocked=0
|
|
[Episode 680] reward=-35144944.7 actor_loss=0.1539 critic_loss=115952061878.8571 entropy=4.1859 ent_coef=0.001994 approx_kl=0.0037 kl_stop=1 intervention_rate=0.0677 front_blocked=0
|
|
[Eval 680] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-469951.5 mean_steps=13.8
|
|
[Episode 690] reward=-49470494.4 actor_loss=0.1061 critic_loss=121279436390.4000 entropy=4.1919 ent_coef=0.001994 approx_kl=0.0017 kl_stop=1 intervention_rate=0.0853 front_blocked=0
|
|
[Episode 700] reward=-49803531.2 actor_loss=0.1126 critic_loss=124686848819.2000 entropy=4.1900 ent_coef=0.001994 approx_kl=0.0036 kl_stop=1 intervention_rate=0.0872 front_blocked=0
|
|
[Eval 700] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-496324.6 mean_steps=13.2
|
|
[Episode 710] reward=-57375754.0 actor_loss=0.1030 critic_loss=127451485525.3333 entropy=4.1884 ent_coef=0.001994 approx_kl=0.0014 kl_stop=1 intervention_rate=0.0840 front_blocked=0
|
|
[Episode 720] reward=-48658534.4 actor_loss=0.1113 critic_loss=120637472023.2727 entropy=4.1912 ent_coef=0.001994 approx_kl=0.0021 kl_stop=1 intervention_rate=0.0833 front_blocked=0
|
|
[Eval 720] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-441811.7 mean_steps=14.6
|
|
[Episode 730] reward=-53449604.6 actor_loss=0.1281 critic_loss=121659839634.2857 entropy=4.1885 ent_coef=0.001993 approx_kl=0.0031 kl_stop=1 intervention_rate=0.0814 front_blocked=0
|
|
[Episode 740] reward=-46102688.2 actor_loss=0.1083 critic_loss=123121505621.3333 entropy=4.1850 ent_coef=0.001993 approx_kl=0.0030 kl_stop=1 intervention_rate=0.0742 front_blocked=0
|
|
[Eval 740] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-413772.3 mean_steps=13.6
|
|
[Episode 750] reward=-49454459.5 actor_loss=0.1071 critic_loss=123369788757.3333 entropy=4.1836 ent_coef=0.001993 approx_kl=0.0022 kl_stop=1 intervention_rate=0.0716 front_blocked=0
|
|
[Episode 760] reward=-59269030.5 actor_loss=0.1558 critic_loss=129341459114.6667 entropy=4.1849 ent_coef=0.001993 approx_kl=0.0036 kl_stop=1 intervention_rate=0.0938 front_blocked=0
|
|
[Eval 760] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-550280.4 mean_steps=13.4
|
|
[Episode 770] reward=-47941567.0 actor_loss=0.0767 critic_loss=120897801420.8000 entropy=4.1853 ent_coef=0.001993 approx_kl=0.0033 kl_stop=1 intervention_rate=0.0736 front_blocked=0
|
|
[Episode 780] reward=-46652798.3 actor_loss=0.1123 critic_loss=121791652864.0000 entropy=4.1886 ent_coef=0.001993 approx_kl=0.0033 kl_stop=1 intervention_rate=0.0736 front_blocked=0
|
|
[Eval 780] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-376332.0 mean_steps=14.9
|
|
[Episode 790] reward=-53367131.7 actor_loss=0.0957 critic_loss=123357876736.0000 entropy=4.1891 ent_coef=0.001993 approx_kl=0.0018 kl_stop=1 intervention_rate=0.0788 front_blocked=0
|
|
[Episode 800] reward=-53766558.7 actor_loss=0.1315 critic_loss=127672224972.8000 entropy=4.1880 ent_coef=0.001993 approx_kl=0.0020 kl_stop=1 intervention_rate=0.0879 front_blocked=0
|
|
[Eval 800] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-461494.9 mean_steps=14.1
|
|
[Episode 810] reward=-48991340.6 actor_loss=0.1209 critic_loss=119918806882.4615 entropy=4.1830 ent_coef=0.001993 approx_kl=0.0021 kl_stop=1 intervention_rate=0.0833 front_blocked=0
|
|
[Episode 820] reward=-48130348.5 actor_loss=0.1198 critic_loss=123183345664.0000 entropy=4.1814 ent_coef=0.001993 approx_kl=0.0045 kl_stop=1 intervention_rate=0.0807 front_blocked=0
|
|
[Eval 820] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-542653.1 mean_steps=13.6
|
|
[Episode 830] reward=-55733939.4 actor_loss=0.1107 critic_loss=128903516569.6000 entropy=4.1827 ent_coef=0.001993 approx_kl=0.0020 kl_stop=1 intervention_rate=0.0827 front_blocked=0
|
|
[Episode 840] reward=-54706765.5 actor_loss=0.1437 critic_loss=128334268888.6154 entropy=4.1885 ent_coef=0.001992 approx_kl=0.0025 kl_stop=1 intervention_rate=0.0866 front_blocked=0
|
|
[Eval 840] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-549089.1 mean_steps=12.8
|
|
[Episode 850] reward=-43908767.5 actor_loss=0.1303 critic_loss=118835955153.4545 entropy=4.1919 ent_coef=0.001992 approx_kl=0.0020 kl_stop=1 intervention_rate=0.0794 front_blocked=0
|
|
[Episode 860] reward=-56309026.0 actor_loss=0.1566 critic_loss=129768207911.3846 entropy=4.1853 ent_coef=0.001992 approx_kl=0.0020 kl_stop=1 intervention_rate=0.0879 front_blocked=0
|
|
[Eval 860] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-393710.6 mean_steps=13.3
|
|
[Episode 870] reward=-49280446.8 actor_loss=0.1195 critic_loss=122961302621.0909 entropy=4.1865 ent_coef=0.001992 approx_kl=0.0035 kl_stop=1 intervention_rate=0.0755 front_blocked=0
|
|
[Episode 880] reward=-39288886.3 actor_loss=0.0818 critic_loss=118501072896.0000 entropy=4.1874 ent_coef=0.001992 approx_kl=0.0033 kl_stop=1 intervention_rate=0.0775 front_blocked=0
|
|
[Eval 880] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-529082.0 mean_steps=12.4
|
|
[Episode 890] reward=-51844430.1 actor_loss=0.1083 critic_loss=125903660828.4444 entropy=4.1848 ent_coef=0.001992 approx_kl=0.0019 kl_stop=1 intervention_rate=0.0801 front_blocked=0
|
|
[Episode 900] reward=-51956609.7 actor_loss=0.1106 critic_loss=126181428503.2727 entropy=4.1858 ent_coef=0.001992 approx_kl=0.0034 kl_stop=1 intervention_rate=0.0788 front_blocked=0
|
|
[Eval 900] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-424900.4 mean_steps=14.5
|
|
[Episode 910] reward=-45581030.0 actor_loss=0.0862 critic_loss=121186356224.0000 entropy=4.1874 ent_coef=0.001992 approx_kl=0.0015 kl_stop=1 intervention_rate=0.0768 front_blocked=0
|
|
[Episode 920] reward=-53360100.7 actor_loss=0.1304 critic_loss=127440100631.2727 entropy=4.1853 ent_coef=0.001992 approx_kl=0.0011 kl_stop=1 intervention_rate=0.0794 front_blocked=0
|
|
[Eval 920] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-520649.9 mean_steps=12.6
|
|
[Episode 930] reward=-46836510.4 actor_loss=0.1048 critic_loss=124415665493.3333 entropy=4.1858 ent_coef=0.001992 approx_kl=0.0016 kl_stop=1 intervention_rate=0.0703 front_blocked=0
|
|
[Episode 940] reward=-52740020.2 actor_loss=0.1181 critic_loss=128207795541.3333 entropy=4.1895 ent_coef=0.001992 approx_kl=0.0013 kl_stop=1 intervention_rate=0.0814 front_blocked=0
|
|
[Eval 940] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-555769.5 mean_steps=12.6
|
|
[Episode 950] reward=-53149246.2 actor_loss=0.1041 critic_loss=127140266666.6667 entropy=4.1898 ent_coef=0.001991 approx_kl=0.0035 kl_stop=1 intervention_rate=0.0807 front_blocked=0
|
|
[Episode 960] reward=-48368625.6 actor_loss=0.1381 critic_loss=121742521344.0000 entropy=4.1902 ent_coef=0.001991 approx_kl=0.0008 kl_stop=1 intervention_rate=0.0833 front_blocked=0
|
|
[Eval 960] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-376567.9 mean_steps=13.3
|
|
[Episode 970] reward=-61915354.6 actor_loss=0.0879 critic_loss=129661266797.7143 entropy=4.1853 ent_coef=0.001991 approx_kl=0.0031 kl_stop=1 intervention_rate=0.0814 front_blocked=0
|
|
[Episode 980] reward=-59690415.5 actor_loss=0.1390 critic_loss=129795188224.0000 entropy=4.1789 ent_coef=0.001991 approx_kl=0.0027 kl_stop=1 intervention_rate=0.0918 front_blocked=0
|
|
[Eval 980] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-498163.5 mean_steps=13.1
|
|
[Episode 990] reward=-53143974.0 actor_loss=0.1228 critic_loss=127847565994.6667 entropy=4.1742 ent_coef=0.001991 approx_kl=0.0015 kl_stop=1 intervention_rate=0.0762 front_blocked=0
|
|
[Episode 1000] reward=-50870974.2 actor_loss=0.0853 critic_loss=121653240354.1333 entropy=4.1748 ent_coef=0.001991 approx_kl=0.0039 kl_stop=1 intervention_rate=0.0781 front_blocked=0
|
|
[Eval 1000] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-449304.2 mean_steps=12.8
|
|
[Episode 1010] reward=-47677615.0 actor_loss=0.1192 critic_loss=123267327772.4444 entropy=4.1724 ent_coef=0.001991 approx_kl=0.0020 kl_stop=1 intervention_rate=0.0807 front_blocked=0
|
|
[Episode 1020] reward=-48260646.0 actor_loss=0.1108 critic_loss=121993256960.0000 entropy=4.1722 ent_coef=0.001991 approx_kl=0.0014 kl_stop=1 intervention_rate=0.0781 front_blocked=0
|
|
[Eval 1020] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-476950.7 mean_steps=13.7
|
|
[Episode 1030] reward=-49429636.4 actor_loss=0.1190 critic_loss=124898821006.2222 entropy=4.1702 ent_coef=0.001991 approx_kl=0.0035 kl_stop=1 intervention_rate=0.0768 front_blocked=0
|
|
[Episode 1040] reward=-49959398.1 actor_loss=0.1219 critic_loss=122641403531.6364 entropy=4.1701 ent_coef=0.001991 approx_kl=0.0033 kl_stop=1 intervention_rate=0.0768 front_blocked=0
|
|
[Eval 1040] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-494451.7 mean_steps=13.1
|
|
[Episode 1050] reward=-46072996.5 actor_loss=0.1237 critic_loss=122026468352.0000 entropy=4.1666 ent_coef=0.001991 approx_kl=0.0028 kl_stop=1 intervention_rate=0.0781 front_blocked=0
|
|
[Episode 1060] reward=-58281450.4 actor_loss=0.1220 critic_loss=128792433225.1429 entropy=4.1628 ent_coef=0.001990 approx_kl=0.0043 kl_stop=1 intervention_rate=0.0814 front_blocked=0
|
|
[Eval 1060] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-435281.6 mean_steps=13.7
|
|
[Episode 1070] reward=-56208711.0 actor_loss=0.1288 critic_loss=126207065367.2727 entropy=4.1644 ent_coef=0.001990 approx_kl=0.0021 kl_stop=1 intervention_rate=0.0853 front_blocked=0
|
|
[Episode 1080] reward=-58788411.4 actor_loss=0.1033 critic_loss=132953948160.0000 entropy=4.1700 ent_coef=0.001990 approx_kl=0.0014 kl_stop=1 intervention_rate=0.0866 front_blocked=0
|
|
[Eval 1080] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-341221.9 mean_steps=14.6
|
|
[Episode 1090] reward=-61552856.8 actor_loss=0.1351 critic_loss=131984992135.5294 entropy=4.1698 ent_coef=0.001990 approx_kl=0.0014 kl_stop=1 intervention_rate=0.0846 front_blocked=0
|
|
[Episode 1100] reward=-52763044.0 actor_loss=0.1216 critic_loss=125920220711.3846 entropy=4.1690 ent_coef=0.001990 approx_kl=0.0026 kl_stop=1 intervention_rate=0.0846 front_blocked=0
|
|
[Eval 1100] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-432680.0 mean_steps=12.7
|
|
[Episode 1110] reward=-47465014.8 actor_loss=0.1143 critic_loss=124667704832.0000 entropy=4.1711 ent_coef=0.001990 approx_kl=0.0013 kl_stop=1 intervention_rate=0.0801 front_blocked=0
|
|
[Episode 1120] reward=-53251666.3 actor_loss=0.1276 critic_loss=123603753562.3529 entropy=4.1761 ent_coef=0.001990 approx_kl=0.0032 kl_stop=1 intervention_rate=0.0905 front_blocked=0
|
|
[Eval 1120] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-446388.9 mean_steps=13.6
|
|
[Episode 1130] reward=-57927552.1 actor_loss=0.0773 critic_loss=128763984384.0000 entropy=4.1746 ent_coef=0.001990 approx_kl=0.0029 kl_stop=1 intervention_rate=0.0840 front_blocked=0
|
|
[Episode 1140] reward=-44943368.9 actor_loss=0.0861 critic_loss=120831557046.8571 entropy=4.1714 ent_coef=0.001990 approx_kl=-0.0003 kl_stop=1 intervention_rate=0.0723 front_blocked=0
|
|
[Eval 1140] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-341596.3 mean_steps=15.4
|
|
[Episode 1150] reward=-53752406.6 actor_loss=0.1194 critic_loss=126088423100.6316 entropy=4.1663 ent_coef=0.001990 approx_kl=0.0036 kl_stop=1 intervention_rate=0.0885 front_blocked=0
|
|
[Episode 1160] reward=-56931744.8 actor_loss=0.0934 critic_loss=126553595904.0000 entropy=4.1626 ent_coef=0.001990 approx_kl=0.0015 kl_stop=1 intervention_rate=0.0846 front_blocked=0
|
|
[Eval 1160] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-414230.1 mean_steps=13.5
|
|
[Episode 1170] reward=-51993616.7 actor_loss=0.1159 critic_loss=125562936713.8462 entropy=4.1638 ent_coef=0.001989 approx_kl=0.0030 kl_stop=1 intervention_rate=0.0827 front_blocked=0
|
|
[Episode 1180] reward=-56010010.0 actor_loss=0.1340 critic_loss=124519766618.3529 entropy=4.1628 ent_coef=0.001989 approx_kl=0.0008 kl_stop=1 intervention_rate=0.0859 front_blocked=0
|
|
[Eval 1180] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-523658.3 mean_steps=13.6
|
|
[Episode 1190] reward=-54171295.7 actor_loss=0.1155 critic_loss=125651947520.0000 entropy=4.1613 ent_coef=0.001989 approx_kl=0.0024 kl_stop=1 intervention_rate=0.0801 front_blocked=0
|
|
[Episode 1200] reward=-53544997.5 actor_loss=0.1030 critic_loss=123904206848.0000 entropy=4.1606 ent_coef=0.001989 approx_kl=0.0015 kl_stop=1 intervention_rate=0.0814 front_blocked=0
|
|
[Eval 1200] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-334396.6 mean_steps=14.8
|
|
[Episode 1210] reward=-43893952.0 actor_loss=0.0854 critic_loss=123039753362.2857 entropy=4.1568 ent_coef=0.001989 approx_kl=0.0026 kl_stop=1 intervention_rate=0.0684 front_blocked=0
|
|
[Episode 1220] reward=-40950866.2 actor_loss=0.1030 critic_loss=116060590899.2000 entropy=4.1593 ent_coef=0.001989 approx_kl=0.0030 kl_stop=1 intervention_rate=0.0729 front_blocked=0
|
|
[Eval 1220] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-402123.3 mean_steps=15.4
|
|
[Episode 1230] reward=-44643362.2 actor_loss=0.1050 critic_loss=116772349724.4444 entropy=4.1559 ent_coef=0.001989 approx_kl=0.0039 kl_stop=1 intervention_rate=0.0768 front_blocked=0
|
|
[Episode 1240] reward=-47705279.1 actor_loss=0.1033 critic_loss=120448925696.0000 entropy=4.1583 ent_coef=0.001989 approx_kl=0.0023 kl_stop=1 intervention_rate=0.0807 front_blocked=0
|
|
[Eval 1240] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-494499.7 mean_steps=13.4
|
|
[Episode 1250] reward=-68818762.8 actor_loss=0.1392 critic_loss=133466152960.0000 entropy=4.1569 ent_coef=0.001989 approx_kl=0.0019 kl_stop=1 intervention_rate=0.0964 front_blocked=0
|
|
[Episode 1260] reward=-49632864.2 actor_loss=0.0938 critic_loss=126313732973.7143 entropy=4.1589 ent_coef=0.001989 approx_kl=0.0034 kl_stop=1 intervention_rate=0.0794 front_blocked=0
|
|
[Eval 1260] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-525510.7 mean_steps=13.7
|
|
[Episode 1270] reward=-51790274.7 actor_loss=0.1056 critic_loss=122268736512.0000 entropy=4.1590 ent_coef=0.001989 approx_kl=0.0037 kl_stop=1 intervention_rate=0.0775 front_blocked=0
|
|
[Episode 1280] reward=-56721334.9 actor_loss=0.1330 critic_loss=126948997120.0000 entropy=4.1574 ent_coef=0.001988 approx_kl=0.0026 kl_stop=1 intervention_rate=0.0807 front_blocked=0
|
|
[Eval 1280] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-451301.0 mean_steps=13.9
|
|
[Episode 1290] reward=-49796747.4 actor_loss=0.1286 critic_loss=125181698048.0000 entropy=4.1594 ent_coef=0.001988 approx_kl=0.0024 kl_stop=1 intervention_rate=0.0833 front_blocked=0
|
|
[Episode 1300] reward=-52015436.3 actor_loss=0.0914 critic_loss=125847721511.3846 entropy=4.1556 ent_coef=0.001988 approx_kl=0.0010 kl_stop=1 intervention_rate=0.0755 front_blocked=0
|
|
[Eval 1300] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-337804.9 mean_steps=14.9
|
|
[Episode 1310] reward=-55957724.2 actor_loss=0.1304 critic_loss=123727340780.3077 entropy=4.1581 ent_coef=0.001988 approx_kl=0.0026 kl_stop=1 intervention_rate=0.0814 front_blocked=0
|
|
[Episode 1320] reward=-52680881.5 actor_loss=0.1266 critic_loss=118363920676.5714 entropy=4.1599 ent_coef=0.001988 approx_kl=0.0006 kl_stop=1 intervention_rate=0.0840 front_blocked=0
|
|
[Eval 1320] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-547708.9 mean_steps=12.8
|
|
[Episode 1330] reward=-53843763.0 actor_loss=0.0868 critic_loss=124864931840.0000 entropy=4.1536 ent_coef=0.001988 approx_kl=0.0039 kl_stop=1 intervention_rate=0.0749 front_blocked=0
|
|
[Episode 1340] reward=-61120431.6 actor_loss=0.1383 critic_loss=130029607789.7143 entropy=4.1513 ent_coef=0.001988 approx_kl=0.0036 kl_stop=1 intervention_rate=0.0853 front_blocked=0
|
|
[Eval 1340] success_rate=0.650 qp_infeasible_rate=0.350 mean_return=-303829.4 mean_steps=17.1
|
|
[Episode 1350] reward=-64441250.0 actor_loss=0.1028 critic_loss=132353005688.4706 entropy=4.1487 ent_coef=0.001988 approx_kl=0.0016 kl_stop=1 intervention_rate=0.0905 front_blocked=0
|
|
[Episode 1360] reward=-49434757.9 actor_loss=0.1231 critic_loss=123442198528.0000 entropy=4.1458 ent_coef=0.001988 approx_kl=0.0026 kl_stop=1 intervention_rate=0.0853 front_blocked=0
|
|
[Eval 1360] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-541409.7 mean_steps=12.8
|
|
[Episode 1370] reward=-65069406.3 actor_loss=0.1402 critic_loss=129232198509.7143 entropy=4.1456 ent_coef=0.001988 approx_kl=0.0032 kl_stop=1 intervention_rate=0.0853 front_blocked=0
|
|
[Episode 1380] reward=-65228411.2 actor_loss=0.1095 critic_loss=130542546488.8889 entropy=4.1466 ent_coef=0.001988 approx_kl=0.0038 kl_stop=1 intervention_rate=0.0846 front_blocked=0
|
|
[Eval 1380] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-498815.3 mean_steps=11.4
|
|
[Episode 1390] reward=-69457304.9 actor_loss=0.1416 critic_loss=134746265757.5385 entropy=4.1408 ent_coef=0.001987 approx_kl=0.0035 kl_stop=1 intervention_rate=0.1022 front_blocked=0
|
|
[Episode 1400] reward=-55843813.7 actor_loss=0.1157 critic_loss=125070923933.5385 entropy=4.1368 ent_coef=0.001987 approx_kl=0.0032 kl_stop=1 intervention_rate=0.0872 front_blocked=0
|
|
[Eval 1400] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-450181.1 mean_steps=12.8
|
|
[Episode 1410] reward=-39090766.4 actor_loss=0.1080 critic_loss=117208355401.1429 entropy=4.1350 ent_coef=0.001987 approx_kl=0.0037 kl_stop=1 intervention_rate=0.0749 front_blocked=0
|
|
[Episode 1420] reward=-52415772.3 actor_loss=0.1101 critic_loss=126114296393.1429 entropy=4.1360 ent_coef=0.001987 approx_kl=0.0023 kl_stop=1 intervention_rate=0.0859 front_blocked=0
|
|
[Eval 1420] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-525859.7 mean_steps=12.0
|
|
[Episode 1430] reward=-49291186.4 actor_loss=0.1155 critic_loss=122417341905.4545 entropy=4.1324 ent_coef=0.001987 approx_kl=0.0036 kl_stop=1 intervention_rate=0.0762 front_blocked=0
|
|
[Episode 1440] reward=-60051845.5 actor_loss=0.1366 critic_loss=130173663914.6667 entropy=4.1299 ent_coef=0.001987 approx_kl=0.0028 kl_stop=1 intervention_rate=0.0872 front_blocked=0
|
|
[Eval 1440] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-462099.0 mean_steps=13.1
|
|
[Episode 1450] reward=-44532358.6 actor_loss=0.1368 critic_loss=124143812608.0000 entropy=4.1315 ent_coef=0.001987 approx_kl=0.0020 kl_stop=1 intervention_rate=0.0788 front_blocked=0
|
|
[Episode 1460] reward=-49693000.5 actor_loss=0.1247 critic_loss=125502818183.5294 entropy=4.1288 ent_coef=0.001987 approx_kl=0.0023 kl_stop=1 intervention_rate=0.0820 front_blocked=0
|
|
[Eval 1460] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-574788.2 mean_steps=12.9
|
|
[Episode 1470] reward=-61313691.0 actor_loss=0.0816 critic_loss=128814980096.0000 entropy=4.1288 ent_coef=0.001987 approx_kl=0.0032 kl_stop=1 intervention_rate=0.0833 front_blocked=0
|
|
[Episode 1480] reward=-51004465.9 actor_loss=0.1502 critic_loss=123033111458.9091 entropy=4.1277 ent_coef=0.001987 approx_kl=0.0030 kl_stop=1 intervention_rate=0.0872 front_blocked=0
|
|
[Eval 1480] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-415578.7 mean_steps=15.2
|
|
[Episode 1490] reward=-51339476.6 actor_loss=0.1106 critic_loss=120746437120.0000 entropy=4.1256 ent_coef=0.001987 approx_kl=0.0024 kl_stop=1 intervention_rate=0.0794 front_blocked=0
|
|
[Episode 1500] reward=-49015461.8 actor_loss=0.1151 critic_loss=122844231972.5714 entropy=4.1240 ent_coef=0.001987 approx_kl=0.0026 kl_stop=1 intervention_rate=0.0840 front_blocked=0
|
|
[Eval 1500] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-523562.1 mean_steps=12.8
|
|
[Episode 1510] reward=-59461164.6 actor_loss=0.1452 critic_loss=127050560853.3333 entropy=4.1264 ent_coef=0.001986 approx_kl=0.0013 kl_stop=1 intervention_rate=0.0859 front_blocked=0
|
|
[Episode 1520] reward=-49369162.6 actor_loss=0.1057 critic_loss=122628199219.2000 entropy=4.1251 ent_coef=0.001986 approx_kl=0.0043 kl_stop=1 intervention_rate=0.0781 front_blocked=0
|
|
[Eval 1520] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-303093.7 mean_steps=15.2
|
|
[Episode 1530] reward=-59182288.1 actor_loss=0.1165 critic_loss=132150705694.1176 entropy=4.1278 ent_coef=0.001986 approx_kl=0.0025 kl_stop=1 intervention_rate=0.0853 front_blocked=0
|
|
[Episode 1540] reward=-60541988.0 actor_loss=0.1243 critic_loss=132349403136.0000 entropy=4.1291 ent_coef=0.001986 approx_kl=0.0007 kl_stop=1 intervention_rate=0.0924 front_blocked=0
|
|
[Eval 1540] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-603035.6 mean_steps=12.3
|
|
[Episode 1550] reward=-48785433.5 actor_loss=0.0776 critic_loss=121545156900.5714 entropy=4.1292 ent_coef=0.001986 approx_kl=0.0039 kl_stop=1 intervention_rate=0.0755 front_blocked=0
|
|
[Episode 1560] reward=-53265918.8 actor_loss=0.1461 critic_loss=123378128896.0000 entropy=4.1304 ent_coef=0.001986 approx_kl=0.0024 kl_stop=1 intervention_rate=0.0840 front_blocked=0
|
|
[Eval 1560] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-578962.1 mean_steps=13.1
|
|
[Episode 1570] reward=-52072390.6 actor_loss=0.1262 critic_loss=125357488730.3529 entropy=4.1280 ent_coef=0.001986 approx_kl=0.0031 kl_stop=1 intervention_rate=0.0840 front_blocked=0
|
|
[Episode 1580] reward=-59649805.6 actor_loss=0.1330 critic_loss=128042774155.6364 entropy=4.1244 ent_coef=0.001986 approx_kl=0.0040 kl_stop=1 intervention_rate=0.0918 front_blocked=0
|
|
[Eval 1580] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-529044.5 mean_steps=12.8
|
|
[Episode 1590] reward=-69774484.6 actor_loss=0.1225 critic_loss=134613527210.6667 entropy=4.1244 ent_coef=0.001986 approx_kl=0.0039 kl_stop=1 intervention_rate=0.0990 front_blocked=0
|
|
[Episode 1600] reward=-52686987.9 actor_loss=0.1130 critic_loss=125500353389.7143 entropy=4.1207 ent_coef=0.001986 approx_kl=0.0026 kl_stop=1 intervention_rate=0.0814 front_blocked=0
|
|
[Eval 1600] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-332534.8 mean_steps=15.3
|
|
[Episode 1610] reward=-56886042.3 actor_loss=0.1349 critic_loss=129004913810.2857 entropy=4.1244 ent_coef=0.001986 approx_kl=0.0057 kl_stop=1 intervention_rate=0.0866 front_blocked=0
|
|
[Episode 1620] reward=-62411967.3 actor_loss=0.0920 critic_loss=133483705856.0000 entropy=4.1238 ent_coef=0.001985 approx_kl=0.0032 kl_stop=1 intervention_rate=0.0794 front_blocked=0
|
|
[Eval 1620] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-409739.7 mean_steps=15.2
|
|
[Episode 1630] reward=-63385857.0 actor_loss=0.1381 critic_loss=127002467805.8667 entropy=4.1216 ent_coef=0.001985 approx_kl=0.0025 kl_stop=1 intervention_rate=0.0911 front_blocked=0
|
|
[Episode 1640] reward=-61245208.0 actor_loss=0.1204 critic_loss=127181111296.0000 entropy=4.1186 ent_coef=0.001985 approx_kl=0.0036 kl_stop=1 intervention_rate=0.0918 front_blocked=0
|
|
[Eval 1640] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-375384.3 mean_steps=14.1
|
|
[Episode 1650] reward=-52838123.4 actor_loss=0.1298 critic_loss=122427999118.2222 entropy=4.1173 ent_coef=0.001985 approx_kl=0.0017 kl_stop=1 intervention_rate=0.0879 front_blocked=0
|
|
[Episode 1660] reward=-44083706.7 actor_loss=0.1191 critic_loss=121130775473.2308 entropy=4.1173 ent_coef=0.001985 approx_kl=0.0032 kl_stop=1 intervention_rate=0.0742 front_blocked=0
|
|
[Eval 1660] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-600349.2 mean_steps=12.9
|
|
[Episode 1670] reward=-48627420.7 actor_loss=0.1110 critic_loss=121996670293.3333 entropy=4.1147 ent_coef=0.001985 approx_kl=0.0014 kl_stop=1 intervention_rate=0.0866 front_blocked=0
|
|
[Episode 1680] reward=-48199420.2 actor_loss=0.1235 critic_loss=124112402139.4286 entropy=4.1122 ent_coef=0.001985 approx_kl=0.0028 kl_stop=1 intervention_rate=0.0859 front_blocked=0
|
|
[Eval 1680] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-481529.1 mean_steps=12.9
|
|
[Episode 1690] reward=-49769636.2 actor_loss=0.1024 critic_loss=124099799040.0000 entropy=4.1117 ent_coef=0.001985 approx_kl=0.0031 kl_stop=1 intervention_rate=0.0859 front_blocked=0
|
|
[Episode 1700] reward=-46313843.5 actor_loss=0.1442 critic_loss=121862192128.0000 entropy=4.1054 ent_coef=0.001985 approx_kl=0.0018 kl_stop=1 intervention_rate=0.0768 front_blocked=0
|
|
[Eval 1700] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-514912.9 mean_steps=12.5
|
|
[Episode 1710] reward=-65205204.5 actor_loss=0.1257 critic_loss=131924271104.0000 entropy=4.1044 ent_coef=0.001985 approx_kl=0.0027 kl_stop=1 intervention_rate=0.0905 front_blocked=0
|
|
[Episode 1720] reward=-56985857.0 actor_loss=0.1216 critic_loss=127774845220.5714 entropy=4.1039 ent_coef=0.001985 approx_kl=0.0041 kl_stop=1 intervention_rate=0.0866 front_blocked=0
|
|
[Eval 1720] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-530210.8 mean_steps=12.6
|
|
[Episode 1730] reward=-49994712.0 actor_loss=0.1331 critic_loss=122109238125.7143 entropy=4.1020 ent_coef=0.001984 approx_kl=0.0048 kl_stop=1 intervention_rate=0.0788 front_blocked=0
|
|
[Episode 1740] reward=-52198108.4 actor_loss=0.1046 critic_loss=122739267584.0000 entropy=4.0990 ent_coef=0.001984 approx_kl=0.0045 kl_stop=1 intervention_rate=0.0807 front_blocked=0
|
|
[Eval 1740] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-550971.7 mean_steps=12.8
|
|
[Episode 1750] reward=-55956008.5 actor_loss=0.1128 critic_loss=127607811510.8571 entropy=4.0995 ent_coef=0.001984 approx_kl=0.0028 kl_stop=1 intervention_rate=0.0859 front_blocked=0
|
|
[Episode 1760] reward=-54454736.2 actor_loss=0.1280 critic_loss=128011403264.0000 entropy=4.1034 ent_coef=0.001984 approx_kl=0.0034 kl_stop=1 intervention_rate=0.0859 front_blocked=0
|
|
[Eval 1760] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-623105.9 mean_steps=10.8
|
|
[Episode 1770] reward=-63373024.9 actor_loss=0.1523 critic_loss=130343031808.0000 entropy=4.1056 ent_coef=0.001984 approx_kl=0.0022 kl_stop=1 intervention_rate=0.0944 front_blocked=0
|
|
[Episode 1780] reward=-58723310.2 actor_loss=0.1149 critic_loss=130848843776.0000 entropy=4.1036 ent_coef=0.001984 approx_kl=0.0037 kl_stop=1 intervention_rate=0.0840 front_blocked=0
|
|
[Eval 1780] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-557814.5 mean_steps=12.8
|
|
[Episode 1790] reward=-50107095.9 actor_loss=0.1281 critic_loss=125379127768.6154 entropy=4.1030 ent_coef=0.001984 approx_kl=0.0020 kl_stop=1 intervention_rate=0.0911 front_blocked=0
|
|
[Episode 1800] reward=-52620787.2 actor_loss=0.1207 critic_loss=128091688960.0000 entropy=4.1003 ent_coef=0.001984 approx_kl=0.0030 kl_stop=1 intervention_rate=0.0755 front_blocked=0
|
|
[Eval 1800] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-470032.4 mean_steps=13.8
|
|
[Episode 1810] reward=-50291266.0 actor_loss=0.1043 critic_loss=123014140723.2000 entropy=4.0958 ent_coef=0.001984 approx_kl=0.0025 kl_stop=1 intervention_rate=0.0827 front_blocked=0
|
|
[Episode 1820] reward=-52580249.7 actor_loss=0.1216 critic_loss=125064959863.4667 entropy=4.0948 ent_coef=0.001984 approx_kl=0.0028 kl_stop=1 intervention_rate=0.0853 front_blocked=0
|
|
[Eval 1820] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-350302.5 mean_steps=15.4
|
|
[Episode 1830] reward=-52669214.8 actor_loss=0.1145 critic_loss=124500617898.6667 entropy=4.0955 ent_coef=0.001984 approx_kl=0.0028 kl_stop=1 intervention_rate=0.0833 front_blocked=0
|
|
[Episode 1840] reward=-51762465.3 actor_loss=0.0980 critic_loss=124772367639.2727 entropy=4.0938 ent_coef=0.001983 approx_kl=0.0029 kl_stop=1 intervention_rate=0.0814 front_blocked=0
|
|
[Eval 1840] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-439439.1 mean_steps=14.2
|
|
[Episode 1850] reward=-59722464.6 actor_loss=0.1167 critic_loss=133873877811.2000 entropy=4.0884 ent_coef=0.001983 approx_kl=0.0017 kl_stop=1 intervention_rate=0.0846 front_blocked=0
|
|
[Episode 1860] reward=-51176514.5 actor_loss=0.0814 critic_loss=128321535089.7778 entropy=4.0928 ent_coef=0.001983 approx_kl=0.0037 kl_stop=1 intervention_rate=0.0729 front_blocked=0
|
|
[Eval 1860] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-529782.0 mean_steps=11.7
|
|
[Episode 1870] reward=-48545295.4 actor_loss=0.0928 critic_loss=120300817314.9091 entropy=4.0886 ent_coef=0.001983 approx_kl=0.0038 kl_stop=1 intervention_rate=0.0768 front_blocked=0
|
|
[Episode 1880] reward=-47383357.8 actor_loss=0.1210 critic_loss=121125639509.3333 entropy=4.0893 ent_coef=0.001983 approx_kl=0.0037 kl_stop=1 intervention_rate=0.0840 front_blocked=0
|
|
[Eval 1880] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-356324.5 mean_steps=14.6
|
|
[Episode 1890] reward=-52411751.0 actor_loss=0.1283 critic_loss=128231432192.0000 entropy=4.0885 ent_coef=0.001983 approx_kl=0.0015 kl_stop=1 intervention_rate=0.0794 front_blocked=0
|
|
[Episode 1900] reward=-73198175.4 actor_loss=0.1258 critic_loss=134873777493.3333 entropy=4.0874 ent_coef=0.001983 approx_kl=0.0024 kl_stop=1 intervention_rate=0.1003 front_blocked=0
|
|
[Eval 1900] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-405296.4 mean_steps=14.2
|
|
[Episode 1910] reward=-47082244.0 actor_loss=0.0825 critic_loss=122621119394.9091 entropy=4.0818 ent_coef=0.001983 approx_kl=0.0014 kl_stop=1 intervention_rate=0.0736 front_blocked=0
|
|
[Episode 1920] reward=-49329279.0 actor_loss=0.1173 critic_loss=126767891602.2857 entropy=4.0830 ent_coef=0.001983 approx_kl=0.0015 kl_stop=1 intervention_rate=0.0814 front_blocked=0
|
|
[Eval 1920] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-450997.0 mean_steps=14.4
|
|
[Episode 1930] reward=-48565822.1 actor_loss=0.1446 critic_loss=125770293248.0000 entropy=4.0815 ent_coef=0.001983 approx_kl=0.0040 kl_stop=1 intervention_rate=0.0723 front_blocked=0
|
|
[Episode 1940] reward=-54673347.0 actor_loss=0.1124 critic_loss=124999203840.0000 entropy=4.0794 ent_coef=0.001983 approx_kl=0.0038 kl_stop=1 intervention_rate=0.0716 front_blocked=0
|
|
[Eval 1940] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-574590.9 mean_steps=12.5
|
|
[Episode 1950] reward=-54993073.3 actor_loss=0.1010 critic_loss=126125159219.2000 entropy=4.0782 ent_coef=0.001982 approx_kl=0.0006 kl_stop=1 intervention_rate=0.0846 front_blocked=0
|
|
[Episode 1960] reward=-60185450.4 actor_loss=0.1555 critic_loss=127062169600.0000 entropy=4.0780 ent_coef=0.001982 approx_kl=0.0033 kl_stop=1 intervention_rate=0.0931 front_blocked=0
|
|
[Eval 1960] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-332549.7 mean_steps=15.2
|
|
[Episode 1970] reward=-44679406.5 actor_loss=0.0939 critic_loss=121299503549.2174 entropy=4.0767 ent_coef=0.001982 approx_kl=0.0035 kl_stop=1 intervention_rate=0.0781 front_blocked=0
|
|
[Episode 1980] reward=-44211634.3 actor_loss=0.0889 critic_loss=118148429637.8182 entropy=4.0765 ent_coef=0.001982 approx_kl=0.0030 kl_stop=1 intervention_rate=0.0814 front_blocked=0
|
|
[Eval 1980] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-419304.3 mean_steps=13.3
|
|
[Episode 1990] reward=-60140477.0 actor_loss=0.0969 critic_loss=126555492352.0000 entropy=4.0735 ent_coef=0.001982 approx_kl=0.0033 kl_stop=1 intervention_rate=0.0846 front_blocked=0
|
|
[Episode 2000] reward=-54445089.5 actor_loss=0.1058 critic_loss=128727488512.0000 entropy=4.0727 ent_coef=0.001982 approx_kl=0.0048 kl_stop=1 intervention_rate=0.0801 front_blocked=0
|
|
[Eval 2000] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-549781.5 mean_steps=12.8
|
|
[Episode 2010] reward=-54142735.0 actor_loss=0.1089 critic_loss=126954022229.3333 entropy=4.0728 ent_coef=0.001982 approx_kl=0.0025 kl_stop=1 intervention_rate=0.0788 front_blocked=0
|
|
[Episode 2020] reward=-53294698.2 actor_loss=0.1049 critic_loss=125974206327.4667 entropy=4.0707 ent_coef=0.001982 approx_kl=0.0011 kl_stop=1 intervention_rate=0.0768 front_blocked=0
|
|
[Eval 2020] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-552614.4 mean_steps=12.7
|
|
[Episode 2030] reward=-62093892.3 actor_loss=0.1485 critic_loss=131182927872.0000 entropy=4.0731 ent_coef=0.001982 approx_kl=0.0032 kl_stop=1 intervention_rate=0.0964 front_blocked=0
|
|
[Episode 2040] reward=-52287142.4 actor_loss=0.1167 critic_loss=126350289826.9091 entropy=4.0703 ent_coef=0.001982 approx_kl=0.0028 kl_stop=1 intervention_rate=0.0820 front_blocked=0
|
|
[Eval 2040] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-507730.9 mean_steps=13.2
|
|
[Episode 2050] reward=-49233277.7 actor_loss=0.1156 critic_loss=125271705442.4615 entropy=4.0699 ent_coef=0.001982 approx_kl=0.0050 kl_stop=1 intervention_rate=0.0859 front_blocked=0
|
|
[Episode 2060] reward=-55887223.0 actor_loss=0.1422 critic_loss=128107844403.2000 entropy=4.0738 ent_coef=0.001981 approx_kl=0.0042 kl_stop=1 intervention_rate=0.0905 front_blocked=0
|
|
[Eval 2060] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-409075.3 mean_steps=14.8
|
|
[Episode 2070] reward=-65710910.6 actor_loss=0.1246 critic_loss=133782807552.0000 entropy=4.0754 ent_coef=0.001981 approx_kl=0.0033 kl_stop=1 intervention_rate=0.0938 front_blocked=0
|
|
[Episode 2080] reward=-61032695.5 actor_loss=0.1073 critic_loss=130958512128.0000 entropy=4.0760 ent_coef=0.001981 approx_kl=0.0018 kl_stop=1 intervention_rate=0.0833 front_blocked=0
|
|
[Eval 2080] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-651565.6 mean_steps=10.9
|
|
[Episode 2090] reward=-49726474.8 actor_loss=0.0971 critic_loss=126003790454.1538 entropy=4.0716 ent_coef=0.001981 approx_kl=0.0029 kl_stop=1 intervention_rate=0.0775 front_blocked=0
|
|
[Episode 2100] reward=-50638503.9 actor_loss=0.1127 critic_loss=126745647331.5556 entropy=4.0709 ent_coef=0.001981 approx_kl=0.0013 kl_stop=1 intervention_rate=0.0840 front_blocked=0
|
|
[Eval 2100] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-402742.3 mean_steps=14.2
|
|
[Episode 2110] reward=-55935748.2 actor_loss=0.1396 critic_loss=126883635200.0000 entropy=4.0709 ent_coef=0.001981 approx_kl=0.0041 kl_stop=1 intervention_rate=0.0892 front_blocked=0
|
|
[Episode 2120] reward=-45660275.9 actor_loss=0.1101 critic_loss=122743617063.3846 entropy=4.0722 ent_coef=0.001981 approx_kl=0.0001 kl_stop=1 intervention_rate=0.0794 front_blocked=0
|
|
[Eval 2120] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-519735.9 mean_steps=13.4
|
|
[Episode 2130] reward=-48422532.7 actor_loss=0.1346 critic_loss=124192468406.8571 entropy=4.0696 ent_coef=0.001981 approx_kl=0.0044 kl_stop=1 intervention_rate=0.0827 front_blocked=0
|
|
[Episode 2140] reward=-51000114.2 actor_loss=0.0955 critic_loss=123832084322.4615 entropy=4.0681 ent_coef=0.001981 approx_kl=0.0019 kl_stop=1 intervention_rate=0.0833 front_blocked=0
|
|
[Eval 2140] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-368900.4 mean_steps=14.1
|
|
[Episode 2150] reward=-58041864.2 actor_loss=0.1125 critic_loss=125123690496.0000 entropy=4.0670 ent_coef=0.001981 approx_kl=0.0032 kl_stop=1 intervention_rate=0.0898 front_blocked=0
|
|
[Episode 2160] reward=-56514291.4 actor_loss=0.1128 critic_loss=127018604544.0000 entropy=4.0701 ent_coef=0.001981 approx_kl=0.0017 kl_stop=1 intervention_rate=0.0814 front_blocked=0
|
|
[Eval 2160] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-472572.9 mean_steps=12.2
|
|
[Episode 2170] reward=-43878984.8 actor_loss=0.1224 critic_loss=119477238897.7778 entropy=4.0692 ent_coef=0.001980 approx_kl=0.0050 kl_stop=1 intervention_rate=0.0742 front_blocked=0
|
|
[Episode 2180] reward=-61082126.5 actor_loss=0.1225 critic_loss=132297256960.0000 entropy=4.0678 ent_coef=0.001980 approx_kl=0.0016 kl_stop=1 intervention_rate=0.0872 front_blocked=0
|
|
[Eval 2180] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-555149.9 mean_steps=11.8
|
|
[Episode 2190] reward=-55729889.6 actor_loss=0.1230 critic_loss=119404581888.0000 entropy=4.0697 ent_coef=0.001980 approx_kl=0.0006 kl_stop=1 intervention_rate=0.0879 front_blocked=0
|
|
[Episode 2200] reward=-61560447.8 actor_loss=0.1077 critic_loss=133212755558.4000 entropy=4.0676 ent_coef=0.001980 approx_kl=0.0017 kl_stop=1 intervention_rate=0.0905 front_blocked=0
|
|
[Eval 2200] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-359403.5 mean_steps=15.3
|
|
[Episode 2210] reward=-65295769.3 actor_loss=0.1342 critic_loss=132900278272.0000 entropy=4.0619 ent_coef=0.001980 approx_kl=0.0039 kl_stop=1 intervention_rate=0.0944 front_blocked=0
|
|
[Episode 2220] reward=-43088229.5 actor_loss=0.0813 critic_loss=120759542784.0000 entropy=4.0601 ent_coef=0.001980 approx_kl=0.0029 kl_stop=1 intervention_rate=0.0684 front_blocked=0
|
|
[Eval 2220] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-456426.1 mean_steps=12.8
|
|
[Episode 2230] reward=-58843539.9 actor_loss=0.1300 critic_loss=133060039475.2000 entropy=4.0622 ent_coef=0.001980 approx_kl=0.0024 kl_stop=1 intervention_rate=0.0866 front_blocked=0
|
|
[Episode 2240] reward=-62638631.2 actor_loss=0.1333 critic_loss=130586166737.4545 entropy=4.0597 ent_coef=0.001980 approx_kl=0.0035 kl_stop=1 intervention_rate=0.0859 front_blocked=0
|
|
[Eval 2240] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-476388.7 mean_steps=13.7
|
|
[Episode 2250] reward=-52686355.0 actor_loss=0.0998 critic_loss=126380361728.0000 entropy=4.0548 ent_coef=0.001980 approx_kl=0.0041 kl_stop=1 intervention_rate=0.0768 front_blocked=0
|
|
[Episode 2260] reward=-43566378.6 actor_loss=0.1149 critic_loss=118773381997.7143 entropy=4.0528 ent_coef=0.001980 approx_kl=0.0027 kl_stop=1 intervention_rate=0.0788 front_blocked=0
|
|
[Eval 2260] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-422102.0 mean_steps=13.4
|
|
[Episode 2270] reward=-54795115.3 actor_loss=0.1246 critic_loss=127322658343.3846 entropy=4.0531 ent_coef=0.001980 approx_kl=0.0036 kl_stop=1 intervention_rate=0.0898 front_blocked=0
|
|
[Episode 2280] reward=-50414976.2 actor_loss=0.0921 critic_loss=123753961335.4667 entropy=4.0505 ent_coef=0.001979 approx_kl=0.0032 kl_stop=1 intervention_rate=0.0749 front_blocked=0
|
|
[Eval 2280] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-481236.7 mean_steps=13.1
|
|
[Episode 2290] reward=-51559816.3 actor_loss=0.1360 critic_loss=119673387235.5556 entropy=4.0513 ent_coef=0.001979 approx_kl=0.0007 kl_stop=1 intervention_rate=0.0905 front_blocked=0
|
|
[Episode 2300] reward=-57324409.0 actor_loss=0.1196 critic_loss=125904997580.8000 entropy=4.0512 ent_coef=0.001979 approx_kl=0.0014 kl_stop=1 intervention_rate=0.0820 front_blocked=0
|
|
[Eval 2300] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-622378.5 mean_steps=11.7
|
|
[Episode 2310] reward=-56518793.6 actor_loss=0.1224 critic_loss=126660173824.0000 entropy=4.0516 ent_coef=0.001979 approx_kl=0.0010 kl_stop=1 intervention_rate=0.0833 front_blocked=0
|
|
[Episode 2320] reward=-53820510.3 actor_loss=0.1110 critic_loss=122521085542.4000 entropy=4.0457 ent_coef=0.001979 approx_kl=0.0038 kl_stop=1 intervention_rate=0.0872 front_blocked=0
|
|
[Eval 2320] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-540431.0 mean_steps=11.9
|
|
[Episode 2330] reward=-49829354.0 actor_loss=0.1015 critic_loss=119828327537.7778 entropy=4.0464 ent_coef=0.001979 approx_kl=0.0014 kl_stop=1 intervention_rate=0.0833 front_blocked=0
|
|
[Episode 2340] reward=-58652924.8 actor_loss=0.0787 critic_loss=131572744192.0000 entropy=4.0478 ent_coef=0.001979 approx_kl=0.0043 kl_stop=1 intervention_rate=0.0801 front_blocked=0
|
|
[Eval 2340] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-453780.4 mean_steps=12.8
|
|
[Episode 2350] reward=-55311126.7 actor_loss=0.1182 critic_loss=123925835502.9333 entropy=4.0468 ent_coef=0.001979 approx_kl=0.0030 kl_stop=1 intervention_rate=0.0853 front_blocked=0
|
|
[Episode 2360] reward=-51141658.2 actor_loss=0.1247 critic_loss=123763013632.0000 entropy=4.0484 ent_coef=0.001979 approx_kl=0.0031 kl_stop=1 intervention_rate=0.0814 front_blocked=0
|
|
[Eval 2360] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-594342.4 mean_steps=12.2
|
|
[Episode 2370] reward=-56353033.7 actor_loss=0.1057 critic_loss=125378898850.9091 entropy=4.0471 ent_coef=0.001979 approx_kl=0.0026 kl_stop=1 intervention_rate=0.0840 front_blocked=0
|
|
[Episode 2380] reward=-50565828.6 actor_loss=0.0879 critic_loss=124535604370.2857 entropy=4.0436 ent_coef=0.001979 approx_kl=0.0025 kl_stop=1 intervention_rate=0.0820 front_blocked=0
|
|
[Eval 2380] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-377191.0 mean_steps=13.8
|
|
[Episode 2390] reward=-53931643.9 actor_loss=0.1188 critic_loss=125877915963.0769 entropy=4.0431 ent_coef=0.001978 approx_kl=0.0035 kl_stop=1 intervention_rate=0.0814 front_blocked=0
|
|
[Episode 2400] reward=-45988564.9 actor_loss=0.0938 critic_loss=122517406659.7647 entropy=4.0429 ent_coef=0.001978 approx_kl=0.0044 kl_stop=1 intervention_rate=0.0781 front_blocked=0
|
|
[Eval 2400] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-540208.7 mean_steps=12.4
|
|
[Episode 2410] reward=-52063911.3 actor_loss=0.1021 critic_loss=121159997667.5556 entropy=4.0416 ent_coef=0.001978 approx_kl=0.0023 kl_stop=1 intervention_rate=0.0814 front_blocked=0
|
|
[Episode 2420] reward=-49917223.2 actor_loss=0.1078 critic_loss=124858667463.1111 entropy=4.0429 ent_coef=0.001978 approx_kl=0.0013 kl_stop=1 intervention_rate=0.0749 front_blocked=0
|
|
[Eval 2420] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-552470.9 mean_steps=12.0
|
|
[Episode 2430] reward=-54173825.2 actor_loss=0.0815 critic_loss=126419917482.6667 entropy=4.0415 ent_coef=0.001978 approx_kl=0.0034 kl_stop=1 intervention_rate=0.0801 front_blocked=0
|
|
[Episode 2440] reward=-49243107.1 actor_loss=0.1163 critic_loss=122187856430.5455 entropy=4.0431 ent_coef=0.001978 approx_kl=0.0031 kl_stop=1 intervention_rate=0.0690 front_blocked=0
|
|
[Eval 2440] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-469371.4 mean_steps=13.1
|
|
[Episode 2450] reward=-50567540.5 actor_loss=0.1151 critic_loss=120095110875.4286 entropy=4.0403 ent_coef=0.001978 approx_kl=0.0039 kl_stop=1 intervention_rate=0.0801 front_blocked=0
|
|
[Episode 2460] reward=-62124179.9 actor_loss=0.1174 critic_loss=131110531657.1429 entropy=4.0394 ent_coef=0.001978 approx_kl=0.0032 kl_stop=1 intervention_rate=0.0879 front_blocked=0
|
|
[Eval 2460] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-623421.1 mean_steps=11.6
|
|
[Episode 2470] reward=-39304688.8 actor_loss=0.0965 critic_loss=115660618865.7778 entropy=4.0385 ent_coef=0.001978 approx_kl=0.0025 kl_stop=1 intervention_rate=0.0736 front_blocked=0
|
|
[Episode 2480] reward=-58989419.9 actor_loss=0.0773 critic_loss=126975424325.8182 entropy=4.0406 ent_coef=0.001978 approx_kl=0.0028 kl_stop=1 intervention_rate=0.0801 front_blocked=0
|
|
[Eval 2480] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-663475.7 mean_steps=12.0
|
|
[Episode 2490] reward=-40447094.1 actor_loss=0.1187 critic_loss=116279048005.8182 entropy=4.0398 ent_coef=0.001978 approx_kl=0.0026 kl_stop=1 intervention_rate=0.0723 front_blocked=0
|
|
[Episode 2500] reward=-41532839.0 actor_loss=0.1232 critic_loss=116452249972.3636 entropy=4.0365 ent_coef=0.001978 approx_kl=0.0037 kl_stop=1 intervention_rate=0.0762 front_blocked=0
|
|
[Eval 2500] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-595073.8 mean_steps=11.6
|
|
[Episode 2510] reward=-50294786.3 actor_loss=0.1272 critic_loss=121396253491.2000 entropy=4.0312 ent_coef=0.001977 approx_kl=0.0024 kl_stop=1 intervention_rate=0.0827 front_blocked=0
|