2907 lines
436 KiB
Plaintext
2907 lines
436 KiB
Plaintext
nohup: ignoring input
|
|
[Episode 10] reward=-56496290.6 actor_loss=0.3227 critic_loss=121984741831.1111 entropy=4.2577 ent_coef=0.002000 approx_kl=0.0020 kl_stop=1 intervention_rate=0.0977 front_blocked=0
|
|
[Episode 20] reward=-60611149.8 actor_loss=0.2760 critic_loss=128111700480.0000 entropy=4.2567 ent_coef=0.002000 approx_kl=0.0022 kl_stop=1 intervention_rate=0.1061 front_blocked=0
|
|
[Eval 20] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-647820.3 mean_steps=11.0
|
|
[Episode 30] reward=-63045787.2 actor_loss=0.1985 critic_loss=133642527350.1538 entropy=4.2622 ent_coef=0.002000 approx_kl=0.0026 kl_stop=1 intervention_rate=0.0931 front_blocked=0
|
|
[Episode 40] reward=-53271802.8 actor_loss=0.1966 critic_loss=122983175850.6667 entropy=4.2682 ent_coef=0.002000 approx_kl=0.0003 kl_stop=0 intervention_rate=0.0924 front_blocked=0
|
|
[Eval 40] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-400703.7 mean_steps=14.8
|
|
[Episode 50] reward=-40607038.8 actor_loss=0.2024 critic_loss=114208212204.3077 entropy=4.2755 ent_coef=0.002000 approx_kl=0.0035 kl_stop=1 intervention_rate=0.0853 front_blocked=0
|
|
[Episode 60] reward=-66300808.7 actor_loss=0.1677 critic_loss=130490473532.2353 entropy=4.2746 ent_coef=0.001999 approx_kl=0.0013 kl_stop=1 intervention_rate=0.0957 front_blocked=0
|
|
[Eval 60] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-544995.7 mean_steps=13.0
|
|
[Episode 70] reward=-54721000.1 actor_loss=0.1882 critic_loss=124476187079.1111 entropy=4.2771 ent_coef=0.001999 approx_kl=0.0028 kl_stop=1 intervention_rate=0.0951 front_blocked=0
|
|
[Episode 80] reward=-58580284.5 actor_loss=0.1303 critic_loss=127428338036.3636 entropy=4.2760 ent_coef=0.001999 approx_kl=0.0018 kl_stop=1 intervention_rate=0.0892 front_blocked=0
|
|
[Eval 80] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-477856.8 mean_steps=14.4
|
|
[Episode 90] reward=-53411034.7 actor_loss=0.1630 critic_loss=123062245533.5385 entropy=4.2738 ent_coef=0.001999 approx_kl=0.0021 kl_stop=1 intervention_rate=0.0905 front_blocked=0
|
|
[Episode 100] reward=-57669159.9 actor_loss=0.1305 critic_loss=127343330840.3810 entropy=4.2730 ent_coef=0.001999 approx_kl=0.0020 kl_stop=1 intervention_rate=0.0859 front_blocked=0
|
|
[Eval 100] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-486597.1 mean_steps=14.5
|
|
[Episode 110] reward=-58670049.1 actor_loss=0.1631 critic_loss=127637036400.6400 entropy=4.2753 ent_coef=0.001999 approx_kl=0.0015 kl_stop=1 intervention_rate=0.0911 front_blocked=0
|
|
[Episode 120] reward=-58657999.4 actor_loss=0.1676 critic_loss=126264424903.1111 entropy=4.2769 ent_coef=0.001999 approx_kl=0.0004 kl_stop=1 intervention_rate=0.0879 front_blocked=0
|
|
[Eval 120] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-531457.4 mean_steps=11.8
|
|
[Episode 130] reward=-54011232.6 actor_loss=0.2306 critic_loss=129836245772.1905 entropy=4.2733 ent_coef=0.001999 approx_kl=0.0013 kl_stop=1 intervention_rate=0.0879 front_blocked=0
|
|
[Episode 140] reward=-53796659.4 actor_loss=0.1312 critic_loss=121578362321.4545 entropy=4.2669 ent_coef=0.001999 approx_kl=0.0012 kl_stop=1 intervention_rate=0.0827 front_blocked=0
|
|
[Eval 140] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-630873.5 mean_steps=12.5
|
|
[Episode 150] reward=-56071938.5 actor_loss=0.1749 critic_loss=122299425751.0400 entropy=4.2607 ent_coef=0.001999 approx_kl=0.0025 kl_stop=1 intervention_rate=0.0879 front_blocked=0
|
|
[Episode 160] reward=-39575614.4 actor_loss=0.1461 critic_loss=114264811812.5714 entropy=4.2615 ent_coef=0.001999 approx_kl=0.0018 kl_stop=1 intervention_rate=0.0749 front_blocked=0
|
|
[Eval 160] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-457549.2 mean_steps=13.0
|
|
[Episode 170] reward=-49068837.5 actor_loss=0.1382 critic_loss=122159541816.8889 entropy=4.2583 ent_coef=0.001998 approx_kl=0.0006 kl_stop=1 intervention_rate=0.0749 front_blocked=0
|
|
[Episode 180] reward=-51909510.4 actor_loss=0.1713 critic_loss=121776541455.0588 entropy=4.2599 ent_coef=0.001998 approx_kl=0.0028 kl_stop=1 intervention_rate=0.0918 front_blocked=0
|
|
[Eval 180] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-452093.1 mean_steps=12.8
|
|
[Episode 190] reward=-65770859.0 actor_loss=0.1666 critic_loss=133417641984.0000 entropy=4.2542 ent_coef=0.001998 approx_kl=0.0045 kl_stop=1 intervention_rate=0.0964 front_blocked=0
|
|
[Episode 200] reward=-52114437.3 actor_loss=0.1067 critic_loss=126194403800.6154 entropy=4.2550 ent_coef=0.001998 approx_kl=0.0048 kl_stop=1 intervention_rate=0.0820 front_blocked=0
|
|
[Eval 200] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-523031.4 mean_steps=12.7
|
|
[Episode 210] reward=-67170668.6 actor_loss=0.1605 critic_loss=137164337643.5200 entropy=4.2506 ent_coef=0.001998 approx_kl=0.0021 kl_stop=1 intervention_rate=0.0970 front_blocked=0
|
|
[Episode 220] reward=-57076261.2 actor_loss=0.1871 critic_loss=125697804800.0000 entropy=4.2463 ent_coef=0.001998 approx_kl=0.0026 kl_stop=1 intervention_rate=0.0944 front_blocked=0
|
|
[Eval 220] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-520051.1 mean_steps=12.8
|
|
[Episode 230] reward=-47170080.9 actor_loss=0.1417 critic_loss=122106056919.5789 entropy=4.2453 ent_coef=0.001998 approx_kl=0.0049 kl_stop=1 intervention_rate=0.0827 front_blocked=0
|
|
[Episode 240] reward=-43889928.1 actor_loss=0.1452 critic_loss=119895527796.3636 entropy=4.2435 ent_coef=0.001998 approx_kl=0.0009 kl_stop=1 intervention_rate=0.0729 front_blocked=0
|
|
[Eval 240] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-359460.1 mean_steps=14.9
|
|
[Episode 250] reward=-56131162.1 actor_loss=0.1311 critic_loss=127673012224.0000 entropy=4.2424 ent_coef=0.001998 approx_kl=0.0030 kl_stop=1 intervention_rate=0.0807 front_blocked=0
|
|
[Episode 260] reward=-53532034.8 actor_loss=0.1215 critic_loss=124412613632.0000 entropy=4.2370 ent_coef=0.001998 approx_kl=0.0008 kl_stop=1 intervention_rate=0.0859 front_blocked=0
|
|
[Eval 260] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-474400.3 mean_steps=13.3
|
|
[Episode 270] reward=-48826672.0 actor_loss=0.1389 critic_loss=119337487473.7778 entropy=4.2359 ent_coef=0.001998 approx_kl=0.0045 kl_stop=1 intervention_rate=0.0840 front_blocked=0
|
|
[Episode 280] reward=-54100988.9 actor_loss=0.1315 critic_loss=127345608192.0000 entropy=4.2347 ent_coef=0.001997 approx_kl=0.0022 kl_stop=1 intervention_rate=0.0807 front_blocked=0
|
|
[Eval 280] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-382531.3 mean_steps=13.9
|
|
[Episode 290] reward=-49264785.9 actor_loss=0.0979 critic_loss=122663326720.0000 entropy=4.2324 ent_coef=0.001997 approx_kl=0.0014 kl_stop=1 intervention_rate=0.0749 front_blocked=0
|
|
[Episode 300] reward=-54752728.7 actor_loss=0.1382 critic_loss=126694122496.0000 entropy=4.2364 ent_coef=0.001997 approx_kl=0.0015 kl_stop=1 intervention_rate=0.0918 front_blocked=0
|
|
[Eval 300] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-399911.3 mean_steps=14.1
|
|
[Episode 310] reward=-50761792.3 actor_loss=0.0935 critic_loss=121983381682.0870 entropy=4.2377 ent_coef=0.001997 approx_kl=0.0025 kl_stop=1 intervention_rate=0.0755 front_blocked=0
|
|
[Episode 320] reward=-53445862.1 actor_loss=0.1365 critic_loss=128295692533.7600 entropy=4.2352 ent_coef=0.001997 approx_kl=0.0029 kl_stop=1 intervention_rate=0.0814 front_blocked=0
|
|
[Eval 320] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-500973.0 mean_steps=13.2
|
|
[Episode 330] reward=-41513419.5 actor_loss=0.1137 critic_loss=119585714858.6667 entropy=4.2309 ent_coef=0.001997 approx_kl=-0.0002 kl_stop=1 intervention_rate=0.0697 front_blocked=0
|
|
[Episode 340] reward=-65895625.7 actor_loss=0.1175 critic_loss=133780068352.0000 entropy=4.2281 ent_coef=0.001997 approx_kl=0.0009 kl_stop=1 intervention_rate=0.0859 front_blocked=0
|
|
[Eval 340] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-366366.4 mean_steps=15.2
|
|
[Episode 350] reward=-45519469.2 actor_loss=0.0977 critic_loss=119207410967.2727 entropy=4.2217 ent_coef=0.001997 approx_kl=0.0021 kl_stop=1 intervention_rate=0.0703 front_blocked=0
|
|
[Episode 360] reward=-64910313.7 actor_loss=0.1209 critic_loss=134707517952.0000 entropy=4.2215 ent_coef=0.001997 approx_kl=0.0001 kl_stop=1 intervention_rate=0.0938 front_blocked=0
|
|
[Eval 360] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-580392.3 mean_steps=11.3
|
|
[Episode 370] reward=-53801891.0 actor_loss=0.1473 critic_loss=127576801280.0000 entropy=4.2188 ent_coef=0.001997 approx_kl=0.0026 kl_stop=1 intervention_rate=0.0840 front_blocked=0
|
|
[Episode 380] reward=-50334279.5 actor_loss=0.1223 critic_loss=127097515008.0000 entropy=4.2114 ent_coef=0.001997 approx_kl=0.0032 kl_stop=1 intervention_rate=0.0755 front_blocked=0
|
|
[Eval 380] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-378348.7 mean_steps=13.3
|
|
[Episode 390] reward=-45637583.1 actor_loss=0.1578 critic_loss=122987661191.5294 entropy=4.2050 ent_coef=0.001996 approx_kl=0.0046 kl_stop=1 intervention_rate=0.0846 front_blocked=0
|
|
[Episode 400] reward=-47589863.2 actor_loss=0.1152 critic_loss=124237224891.7333 entropy=4.2063 ent_coef=0.001996 approx_kl=0.0011 kl_stop=1 intervention_rate=0.0775 front_blocked=0
|
|
[Eval 400] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-307878.5 mean_steps=15.2
|
|
[Episode 410] reward=-47522170.2 actor_loss=0.1285 critic_loss=120630910098.2857 entropy=4.2012 ent_coef=0.001996 approx_kl=0.0017 kl_stop=1 intervention_rate=0.0762 front_blocked=0
|
|
[Episode 420] reward=-56660727.8 actor_loss=0.1347 critic_loss=129353600133.5652 entropy=4.1991 ent_coef=0.001996 approx_kl=0.0014 kl_stop=1 intervention_rate=0.0846 front_blocked=0
|
|
[Eval 420] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-543654.3 mean_steps=12.3
|
|
[Episode 430] reward=-49220509.2 actor_loss=0.1268 critic_loss=126829160740.5714 entropy=4.1986 ent_coef=0.001996 approx_kl=0.0036 kl_stop=1 intervention_rate=0.0768 front_blocked=0
|
|
[Episode 440] reward=-39745843.4 actor_loss=0.1162 critic_loss=112348769581.1765 entropy=4.1991 ent_coef=0.001996 approx_kl=0.0029 kl_stop=1 intervention_rate=0.0716 front_blocked=0
|
|
[Eval 440] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-507364.4 mean_steps=13.3
|
|
[Episode 450] reward=-42625843.5 actor_loss=0.1240 critic_loss=117098818560.0000 entropy=4.2000 ent_coef=0.001996 approx_kl=0.0032 kl_stop=1 intervention_rate=0.0723 front_blocked=0
|
|
[Episode 460] reward=-46913927.1 actor_loss=0.1187 critic_loss=122213515673.6000 entropy=4.1933 ent_coef=0.001996 approx_kl=0.0025 kl_stop=1 intervention_rate=0.0775 front_blocked=0
|
|
[Eval 460] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-531365.2 mean_steps=13.4
|
|
[Episode 470] reward=-48195139.3 actor_loss=0.1388 critic_loss=123647064291.5556 entropy=4.1904 ent_coef=0.001996 approx_kl=0.0031 kl_stop=1 intervention_rate=0.0749 front_blocked=0
|
|
[Episode 480] reward=-55448488.8 actor_loss=0.1157 critic_loss=128298313216.0000 entropy=4.1902 ent_coef=0.001996 approx_kl=0.0014 kl_stop=1 intervention_rate=0.0840 front_blocked=0
|
|
[Eval 480] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-377425.3 mean_steps=14.2
|
|
[Episode 490] reward=-41778985.0 actor_loss=0.1335 critic_loss=118513517025.8824 entropy=4.1908 ent_coef=0.001996 approx_kl=0.0018 kl_stop=1 intervention_rate=0.0742 front_blocked=0
|
|
[Episode 500] reward=-48928781.6 actor_loss=0.1302 critic_loss=123309213882.1818 entropy=4.1891 ent_coef=0.001996 approx_kl=0.0012 kl_stop=1 intervention_rate=0.0742 front_blocked=0
|
|
[Eval 500] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-391654.5 mean_steps=15.1
|
|
[Episode 510] reward=-43458068.4 actor_loss=0.1581 critic_loss=121486024192.0000 entropy=4.1909 ent_coef=0.001995 approx_kl=0.0004 kl_stop=1 intervention_rate=0.0814 front_blocked=0
|
|
[Episode 520] reward=-52251644.3 actor_loss=0.1273 critic_loss=127484853248.0000 entropy=4.1840 ent_coef=0.001995 approx_kl=0.0021 kl_stop=1 intervention_rate=0.0781 front_blocked=0
|
|
[Eval 520] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-274979.1 mean_steps=15.7
|
|
[Episode 530] reward=-58842297.9 actor_loss=0.1169 critic_loss=125155077324.8000 entropy=4.1807 ent_coef=0.001995 approx_kl=0.0033 kl_stop=1 intervention_rate=0.0801 front_blocked=0
|
|
[Episode 540] reward=-39927144.7 actor_loss=0.0876 critic_loss=114764991566.7692 entropy=4.1810 ent_coef=0.001995 approx_kl=0.0026 kl_stop=1 intervention_rate=0.0716 front_blocked=0
|
|
[Eval 540] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-463039.5 mean_steps=13.2
|
|
[Episode 550] reward=-51087729.9 actor_loss=0.1440 critic_loss=121499171840.0000 entropy=4.1828 ent_coef=0.001995 approx_kl=0.0019 kl_stop=1 intervention_rate=0.0807 front_blocked=0
|
|
[Episode 560] reward=-65464642.7 actor_loss=0.1427 critic_loss=130300248610.1333 entropy=4.1805 ent_coef=0.001995 approx_kl=0.0018 kl_stop=1 intervention_rate=0.0924 front_blocked=0
|
|
[Eval 560] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-317893.4 mean_steps=16.1
|
|
[Episode 570] reward=-49133908.1 actor_loss=0.1132 critic_loss=125379599213.7143 entropy=4.1813 ent_coef=0.001995 approx_kl=0.0017 kl_stop=1 intervention_rate=0.0788 front_blocked=0
|
|
[Episode 580] reward=-40342749.1 actor_loss=0.1368 critic_loss=117497769984.0000 entropy=4.1787 ent_coef=0.001995 approx_kl=0.0020 kl_stop=1 intervention_rate=0.0794 front_blocked=0
|
|
[Eval 580] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-372622.1 mean_steps=13.8
|
|
[Episode 590] reward=-44890760.1 actor_loss=0.1239 critic_loss=118587565056.0000 entropy=4.1790 ent_coef=0.001995 approx_kl=0.0024 kl_stop=1 intervention_rate=0.0853 front_blocked=0
|
|
[Episode 600] reward=-39771247.2 actor_loss=0.1222 critic_loss=118625696808.9600 entropy=4.1729 ent_coef=0.001995 approx_kl=0.0025 kl_stop=1 intervention_rate=0.0742 front_blocked=0
|
|
[Eval 600] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-461029.0 mean_steps=14.1
|
|
[Episode 610] reward=-52821338.0 actor_loss=0.1201 critic_loss=126508147598.2222 entropy=4.1747 ent_coef=0.001995 approx_kl=0.0024 kl_stop=1 intervention_rate=0.0801 front_blocked=0
|
|
[Episode 620] reward=-52751099.6 actor_loss=0.1563 critic_loss=123214995456.0000 entropy=4.1809 ent_coef=0.001994 approx_kl=0.0027 kl_stop=1 intervention_rate=0.0892 front_blocked=0
|
|
[Eval 620] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-531685.5 mean_steps=12.9
|
|
[Episode 630] reward=-46774686.9 actor_loss=0.1100 critic_loss=120962139282.2857 entropy=4.1783 ent_coef=0.001994 approx_kl=0.0024 kl_stop=1 intervention_rate=0.0801 front_blocked=0
|
|
[Episode 640] reward=-39861458.7 actor_loss=0.1133 critic_loss=117034416810.6667 entropy=4.1753 ent_coef=0.001994 approx_kl=0.0034 kl_stop=1 intervention_rate=0.0690 front_blocked=0
|
|
[Eval 640] success_rate=0.650 qp_infeasible_rate=0.350 mean_return=-257016.6 mean_steps=16.4
|
|
[Episode 650] reward=-60172696.0 actor_loss=0.1197 critic_loss=130656425013.8947 entropy=4.1817 ent_coef=0.001994 approx_kl=0.0026 kl_stop=1 intervention_rate=0.0840 front_blocked=0
|
|
[Episode 660] reward=-51502239.9 actor_loss=0.1092 critic_loss=124843710756.5714 entropy=4.1842 ent_coef=0.001994 approx_kl=0.0035 kl_stop=1 intervention_rate=0.0781 front_blocked=0
|
|
[Eval 660] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-430705.5 mean_steps=14.3
|
|
[Episode 670] reward=-50081370.1 actor_loss=0.1011 critic_loss=123590824755.2000 entropy=4.1866 ent_coef=0.001994 approx_kl=0.0034 kl_stop=1 intervention_rate=0.0814 front_blocked=0
|
|
[Episode 680] reward=-35144944.7 actor_loss=0.1539 critic_loss=115952061878.8571 entropy=4.1859 ent_coef=0.001994 approx_kl=0.0037 kl_stop=1 intervention_rate=0.0677 front_blocked=0
|
|
[Eval 680] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-469951.5 mean_steps=13.8
|
|
[Episode 690] reward=-49470494.4 actor_loss=0.1061 critic_loss=121279436390.4000 entropy=4.1919 ent_coef=0.001994 approx_kl=0.0017 kl_stop=1 intervention_rate=0.0853 front_blocked=0
|
|
[Episode 700] reward=-49803531.2 actor_loss=0.1126 critic_loss=124686848819.2000 entropy=4.1900 ent_coef=0.001994 approx_kl=0.0036 kl_stop=1 intervention_rate=0.0872 front_blocked=0
|
|
[Eval 700] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-496324.6 mean_steps=13.2
|
|
[Episode 710] reward=-57375754.0 actor_loss=0.1030 critic_loss=127451485525.3333 entropy=4.1884 ent_coef=0.001994 approx_kl=0.0014 kl_stop=1 intervention_rate=0.0840 front_blocked=0
|
|
[Episode 720] reward=-48658534.4 actor_loss=0.1113 critic_loss=120637472023.2727 entropy=4.1912 ent_coef=0.001994 approx_kl=0.0021 kl_stop=1 intervention_rate=0.0833 front_blocked=0
|
|
[Eval 720] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-441811.7 mean_steps=14.6
|
|
[Episode 730] reward=-53449604.6 actor_loss=0.1281 critic_loss=121659839634.2857 entropy=4.1885 ent_coef=0.001993 approx_kl=0.0031 kl_stop=1 intervention_rate=0.0814 front_blocked=0
|
|
[Episode 740] reward=-46102688.2 actor_loss=0.1083 critic_loss=123121505621.3333 entropy=4.1850 ent_coef=0.001993 approx_kl=0.0030 kl_stop=1 intervention_rate=0.0742 front_blocked=0
|
|
[Eval 740] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-413772.3 mean_steps=13.6
|
|
[Episode 750] reward=-49454459.5 actor_loss=0.1071 critic_loss=123369788757.3333 entropy=4.1836 ent_coef=0.001993 approx_kl=0.0022 kl_stop=1 intervention_rate=0.0716 front_blocked=0
|
|
[Episode 760] reward=-59269030.5 actor_loss=0.1558 critic_loss=129341459114.6667 entropy=4.1849 ent_coef=0.001993 approx_kl=0.0036 kl_stop=1 intervention_rate=0.0938 front_blocked=0
|
|
[Eval 760] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-550280.4 mean_steps=13.4
|
|
[Episode 770] reward=-47941567.0 actor_loss=0.0767 critic_loss=120897801420.8000 entropy=4.1853 ent_coef=0.001993 approx_kl=0.0033 kl_stop=1 intervention_rate=0.0736 front_blocked=0
|
|
[Episode 780] reward=-46652798.3 actor_loss=0.1123 critic_loss=121791652864.0000 entropy=4.1886 ent_coef=0.001993 approx_kl=0.0033 kl_stop=1 intervention_rate=0.0736 front_blocked=0
|
|
[Eval 780] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-376332.0 mean_steps=14.9
|
|
[Episode 790] reward=-53367131.7 actor_loss=0.0957 critic_loss=123357876736.0000 entropy=4.1891 ent_coef=0.001993 approx_kl=0.0018 kl_stop=1 intervention_rate=0.0788 front_blocked=0
|
|
[Episode 800] reward=-53766558.7 actor_loss=0.1315 critic_loss=127672224972.8000 entropy=4.1880 ent_coef=0.001993 approx_kl=0.0020 kl_stop=1 intervention_rate=0.0879 front_blocked=0
|
|
[Eval 800] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-461494.9 mean_steps=14.1
|
|
[Episode 810] reward=-48991340.6 actor_loss=0.1209 critic_loss=119918806882.4615 entropy=4.1830 ent_coef=0.001993 approx_kl=0.0021 kl_stop=1 intervention_rate=0.0833 front_blocked=0
|
|
[Episode 820] reward=-48130348.5 actor_loss=0.1198 critic_loss=123183345664.0000 entropy=4.1814 ent_coef=0.001993 approx_kl=0.0045 kl_stop=1 intervention_rate=0.0807 front_blocked=0
|
|
[Eval 820] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-542653.1 mean_steps=13.6
|
|
[Episode 830] reward=-55733939.4 actor_loss=0.1107 critic_loss=128903516569.6000 entropy=4.1827 ent_coef=0.001993 approx_kl=0.0020 kl_stop=1 intervention_rate=0.0827 front_blocked=0
|
|
[Episode 840] reward=-54706765.5 actor_loss=0.1437 critic_loss=128334268888.6154 entropy=4.1885 ent_coef=0.001992 approx_kl=0.0025 kl_stop=1 intervention_rate=0.0866 front_blocked=0
|
|
[Eval 840] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-549089.1 mean_steps=12.8
|
|
[Episode 850] reward=-43908767.5 actor_loss=0.1303 critic_loss=118835955153.4545 entropy=4.1919 ent_coef=0.001992 approx_kl=0.0020 kl_stop=1 intervention_rate=0.0794 front_blocked=0
|
|
[Episode 860] reward=-56309026.0 actor_loss=0.1566 critic_loss=129768207911.3846 entropy=4.1853 ent_coef=0.001992 approx_kl=0.0020 kl_stop=1 intervention_rate=0.0879 front_blocked=0
|
|
[Eval 860] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-393710.6 mean_steps=13.3
|
|
[Episode 870] reward=-49280446.8 actor_loss=0.1195 critic_loss=122961302621.0909 entropy=4.1865 ent_coef=0.001992 approx_kl=0.0035 kl_stop=1 intervention_rate=0.0755 front_blocked=0
|
|
[Episode 880] reward=-39288886.3 actor_loss=0.0818 critic_loss=118501072896.0000 entropy=4.1874 ent_coef=0.001992 approx_kl=0.0033 kl_stop=1 intervention_rate=0.0775 front_blocked=0
|
|
[Eval 880] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-529082.0 mean_steps=12.4
|
|
[Episode 890] reward=-51844430.1 actor_loss=0.1083 critic_loss=125903660828.4444 entropy=4.1848 ent_coef=0.001992 approx_kl=0.0019 kl_stop=1 intervention_rate=0.0801 front_blocked=0
|
|
[Episode 900] reward=-51956609.7 actor_loss=0.1106 critic_loss=126181428503.2727 entropy=4.1858 ent_coef=0.001992 approx_kl=0.0034 kl_stop=1 intervention_rate=0.0788 front_blocked=0
|
|
[Eval 900] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-424900.4 mean_steps=14.5
|
|
[Episode 910] reward=-45581030.0 actor_loss=0.0862 critic_loss=121186356224.0000 entropy=4.1874 ent_coef=0.001992 approx_kl=0.0015 kl_stop=1 intervention_rate=0.0768 front_blocked=0
|
|
[Episode 920] reward=-53360100.7 actor_loss=0.1304 critic_loss=127440100631.2727 entropy=4.1853 ent_coef=0.001992 approx_kl=0.0011 kl_stop=1 intervention_rate=0.0794 front_blocked=0
|
|
[Eval 920] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-520649.9 mean_steps=12.6
|
|
[Episode 930] reward=-46836510.4 actor_loss=0.1048 critic_loss=124415665493.3333 entropy=4.1858 ent_coef=0.001992 approx_kl=0.0016 kl_stop=1 intervention_rate=0.0703 front_blocked=0
|
|
[Episode 940] reward=-52740020.2 actor_loss=0.1181 critic_loss=128207795541.3333 entropy=4.1895 ent_coef=0.001992 approx_kl=0.0013 kl_stop=1 intervention_rate=0.0814 front_blocked=0
|
|
[Eval 940] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-555769.5 mean_steps=12.6
|
|
[Episode 950] reward=-53149246.2 actor_loss=0.1041 critic_loss=127140266666.6667 entropy=4.1898 ent_coef=0.001991 approx_kl=0.0035 kl_stop=1 intervention_rate=0.0807 front_blocked=0
|
|
[Episode 960] reward=-48368625.6 actor_loss=0.1381 critic_loss=121742521344.0000 entropy=4.1902 ent_coef=0.001991 approx_kl=0.0008 kl_stop=1 intervention_rate=0.0833 front_blocked=0
|
|
[Eval 960] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-376567.9 mean_steps=13.3
|
|
[Episode 970] reward=-61915354.6 actor_loss=0.0879 critic_loss=129661266797.7143 entropy=4.1853 ent_coef=0.001991 approx_kl=0.0031 kl_stop=1 intervention_rate=0.0814 front_blocked=0
|
|
[Episode 980] reward=-59690415.5 actor_loss=0.1390 critic_loss=129795188224.0000 entropy=4.1789 ent_coef=0.001991 approx_kl=0.0027 kl_stop=1 intervention_rate=0.0918 front_blocked=0
|
|
[Eval 980] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-498163.5 mean_steps=13.1
|
|
[Episode 990] reward=-53143974.0 actor_loss=0.1228 critic_loss=127847565994.6667 entropy=4.1742 ent_coef=0.001991 approx_kl=0.0015 kl_stop=1 intervention_rate=0.0762 front_blocked=0
|
|
[Episode 1000] reward=-50870974.2 actor_loss=0.0853 critic_loss=121653240354.1333 entropy=4.1748 ent_coef=0.001991 approx_kl=0.0039 kl_stop=1 intervention_rate=0.0781 front_blocked=0
|
|
[Eval 1000] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-449304.2 mean_steps=12.8
|
|
[Episode 1010] reward=-47677615.0 actor_loss=0.1192 critic_loss=123267327772.4444 entropy=4.1724 ent_coef=0.001991 approx_kl=0.0020 kl_stop=1 intervention_rate=0.0807 front_blocked=0
|
|
[Episode 1020] reward=-48260646.0 actor_loss=0.1108 critic_loss=121993256960.0000 entropy=4.1722 ent_coef=0.001991 approx_kl=0.0014 kl_stop=1 intervention_rate=0.0781 front_blocked=0
|
|
[Eval 1020] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-476950.7 mean_steps=13.7
|
|
[Episode 1030] reward=-49429636.4 actor_loss=0.1190 critic_loss=124898821006.2222 entropy=4.1702 ent_coef=0.001991 approx_kl=0.0035 kl_stop=1 intervention_rate=0.0768 front_blocked=0
|
|
[Episode 1040] reward=-49959398.1 actor_loss=0.1219 critic_loss=122641403531.6364 entropy=4.1701 ent_coef=0.001991 approx_kl=0.0033 kl_stop=1 intervention_rate=0.0768 front_blocked=0
|
|
[Eval 1040] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-494451.7 mean_steps=13.1
|
|
[Episode 1050] reward=-46072996.5 actor_loss=0.1237 critic_loss=122026468352.0000 entropy=4.1666 ent_coef=0.001991 approx_kl=0.0028 kl_stop=1 intervention_rate=0.0781 front_blocked=0
|
|
[Episode 1060] reward=-58281450.4 actor_loss=0.1220 critic_loss=128792433225.1429 entropy=4.1628 ent_coef=0.001990 approx_kl=0.0043 kl_stop=1 intervention_rate=0.0814 front_blocked=0
|
|
[Eval 1060] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-435281.6 mean_steps=13.7
|
|
[Episode 1070] reward=-56208711.0 actor_loss=0.1288 critic_loss=126207065367.2727 entropy=4.1644 ent_coef=0.001990 approx_kl=0.0021 kl_stop=1 intervention_rate=0.0853 front_blocked=0
|
|
[Episode 1080] reward=-58788411.4 actor_loss=0.1033 critic_loss=132953948160.0000 entropy=4.1700 ent_coef=0.001990 approx_kl=0.0014 kl_stop=1 intervention_rate=0.0866 front_blocked=0
|
|
[Eval 1080] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-341221.9 mean_steps=14.6
|
|
[Episode 1090] reward=-61552856.8 actor_loss=0.1351 critic_loss=131984992135.5294 entropy=4.1698 ent_coef=0.001990 approx_kl=0.0014 kl_stop=1 intervention_rate=0.0846 front_blocked=0
|
|
[Episode 1100] reward=-52763044.0 actor_loss=0.1216 critic_loss=125920220711.3846 entropy=4.1690 ent_coef=0.001990 approx_kl=0.0026 kl_stop=1 intervention_rate=0.0846 front_blocked=0
|
|
[Eval 1100] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-432680.0 mean_steps=12.7
|
|
[Episode 1110] reward=-47465014.8 actor_loss=0.1143 critic_loss=124667704832.0000 entropy=4.1711 ent_coef=0.001990 approx_kl=0.0013 kl_stop=1 intervention_rate=0.0801 front_blocked=0
|
|
[Episode 1120] reward=-53251666.3 actor_loss=0.1276 critic_loss=123603753562.3529 entropy=4.1761 ent_coef=0.001990 approx_kl=0.0032 kl_stop=1 intervention_rate=0.0905 front_blocked=0
|
|
[Eval 1120] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-446388.9 mean_steps=13.6
|
|
[Episode 1130] reward=-57927552.1 actor_loss=0.0773 critic_loss=128763984384.0000 entropy=4.1746 ent_coef=0.001990 approx_kl=0.0029 kl_stop=1 intervention_rate=0.0840 front_blocked=0
|
|
[Episode 1140] reward=-44943368.9 actor_loss=0.0861 critic_loss=120831557046.8571 entropy=4.1714 ent_coef=0.001990 approx_kl=-0.0003 kl_stop=1 intervention_rate=0.0723 front_blocked=0
|
|
[Eval 1140] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-341596.3 mean_steps=15.4
|
|
[Episode 1150] reward=-53752406.6 actor_loss=0.1194 critic_loss=126088423100.6316 entropy=4.1663 ent_coef=0.001990 approx_kl=0.0036 kl_stop=1 intervention_rate=0.0885 front_blocked=0
|
|
[Episode 1160] reward=-56931744.8 actor_loss=0.0934 critic_loss=126553595904.0000 entropy=4.1626 ent_coef=0.001990 approx_kl=0.0015 kl_stop=1 intervention_rate=0.0846 front_blocked=0
|
|
[Eval 1160] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-414230.1 mean_steps=13.5
|
|
[Episode 1170] reward=-51993616.7 actor_loss=0.1159 critic_loss=125562936713.8462 entropy=4.1638 ent_coef=0.001989 approx_kl=0.0030 kl_stop=1 intervention_rate=0.0827 front_blocked=0
|
|
[Episode 1180] reward=-56010010.0 actor_loss=0.1340 critic_loss=124519766618.3529 entropy=4.1628 ent_coef=0.001989 approx_kl=0.0008 kl_stop=1 intervention_rate=0.0859 front_blocked=0
|
|
[Eval 1180] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-523658.3 mean_steps=13.6
|
|
[Episode 1190] reward=-54171295.7 actor_loss=0.1155 critic_loss=125651947520.0000 entropy=4.1613 ent_coef=0.001989 approx_kl=0.0024 kl_stop=1 intervention_rate=0.0801 front_blocked=0
|
|
[Episode 1200] reward=-53544997.5 actor_loss=0.1030 critic_loss=123904206848.0000 entropy=4.1606 ent_coef=0.001989 approx_kl=0.0015 kl_stop=1 intervention_rate=0.0814 front_blocked=0
|
|
[Eval 1200] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-334396.6 mean_steps=14.8
|
|
[Episode 1210] reward=-43893952.0 actor_loss=0.0854 critic_loss=123039753362.2857 entropy=4.1568 ent_coef=0.001989 approx_kl=0.0026 kl_stop=1 intervention_rate=0.0684 front_blocked=0
|
|
[Episode 1220] reward=-40950866.2 actor_loss=0.1030 critic_loss=116060590899.2000 entropy=4.1593 ent_coef=0.001989 approx_kl=0.0030 kl_stop=1 intervention_rate=0.0729 front_blocked=0
|
|
[Eval 1220] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-402123.3 mean_steps=15.4
|
|
[Episode 1230] reward=-44643362.2 actor_loss=0.1050 critic_loss=116772349724.4444 entropy=4.1559 ent_coef=0.001989 approx_kl=0.0039 kl_stop=1 intervention_rate=0.0768 front_blocked=0
|
|
[Episode 1240] reward=-47705279.1 actor_loss=0.1033 critic_loss=120448925696.0000 entropy=4.1583 ent_coef=0.001989 approx_kl=0.0023 kl_stop=1 intervention_rate=0.0807 front_blocked=0
|
|
[Eval 1240] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-494499.7 mean_steps=13.4
|
|
[Episode 1250] reward=-68818762.8 actor_loss=0.1392 critic_loss=133466152960.0000 entropy=4.1569 ent_coef=0.001989 approx_kl=0.0019 kl_stop=1 intervention_rate=0.0964 front_blocked=0
|
|
[Episode 1260] reward=-49632864.2 actor_loss=0.0938 critic_loss=126313732973.7143 entropy=4.1589 ent_coef=0.001989 approx_kl=0.0034 kl_stop=1 intervention_rate=0.0794 front_blocked=0
|
|
[Eval 1260] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-525510.7 mean_steps=13.7
|
|
[Episode 1270] reward=-51790274.7 actor_loss=0.1056 critic_loss=122268736512.0000 entropy=4.1590 ent_coef=0.001989 approx_kl=0.0037 kl_stop=1 intervention_rate=0.0775 front_blocked=0
|
|
[Episode 1280] reward=-56721334.9 actor_loss=0.1330 critic_loss=126948997120.0000 entropy=4.1574 ent_coef=0.001988 approx_kl=0.0026 kl_stop=1 intervention_rate=0.0807 front_blocked=0
|
|
[Eval 1280] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-451301.0 mean_steps=13.9
|
|
[Episode 1290] reward=-49796747.4 actor_loss=0.1286 critic_loss=125181698048.0000 entropy=4.1594 ent_coef=0.001988 approx_kl=0.0024 kl_stop=1 intervention_rate=0.0833 front_blocked=0
|
|
[Episode 1300] reward=-52015436.3 actor_loss=0.0914 critic_loss=125847721511.3846 entropy=4.1556 ent_coef=0.001988 approx_kl=0.0010 kl_stop=1 intervention_rate=0.0755 front_blocked=0
|
|
[Eval 1300] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-337804.9 mean_steps=14.9
|
|
[Episode 1310] reward=-55957724.2 actor_loss=0.1304 critic_loss=123727340780.3077 entropy=4.1581 ent_coef=0.001988 approx_kl=0.0026 kl_stop=1 intervention_rate=0.0814 front_blocked=0
|
|
[Episode 1320] reward=-52680881.5 actor_loss=0.1266 critic_loss=118363920676.5714 entropy=4.1599 ent_coef=0.001988 approx_kl=0.0006 kl_stop=1 intervention_rate=0.0840 front_blocked=0
|
|
[Eval 1320] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-547708.9 mean_steps=12.8
|
|
[Episode 1330] reward=-53843763.0 actor_loss=0.0868 critic_loss=124864931840.0000 entropy=4.1536 ent_coef=0.001988 approx_kl=0.0039 kl_stop=1 intervention_rate=0.0749 front_blocked=0
|
|
[Episode 1340] reward=-61120431.6 actor_loss=0.1383 critic_loss=130029607789.7143 entropy=4.1513 ent_coef=0.001988 approx_kl=0.0036 kl_stop=1 intervention_rate=0.0853 front_blocked=0
|
|
[Eval 1340] success_rate=0.650 qp_infeasible_rate=0.350 mean_return=-303829.4 mean_steps=17.1
|
|
[Episode 1350] reward=-64441250.0 actor_loss=0.1028 critic_loss=132353005688.4706 entropy=4.1487 ent_coef=0.001988 approx_kl=0.0016 kl_stop=1 intervention_rate=0.0905 front_blocked=0
|
|
[Episode 1360] reward=-49434757.9 actor_loss=0.1231 critic_loss=123442198528.0000 entropy=4.1458 ent_coef=0.001988 approx_kl=0.0026 kl_stop=1 intervention_rate=0.0853 front_blocked=0
|
|
[Eval 1360] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-541409.7 mean_steps=12.8
|
|
[Episode 1370] reward=-65069406.3 actor_loss=0.1402 critic_loss=129232198509.7143 entropy=4.1456 ent_coef=0.001988 approx_kl=0.0032 kl_stop=1 intervention_rate=0.0853 front_blocked=0
|
|
[Episode 1380] reward=-65228411.2 actor_loss=0.1095 critic_loss=130542546488.8889 entropy=4.1466 ent_coef=0.001988 approx_kl=0.0038 kl_stop=1 intervention_rate=0.0846 front_blocked=0
|
|
[Eval 1380] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-498815.3 mean_steps=11.4
|
|
[Episode 1390] reward=-69457304.9 actor_loss=0.1416 critic_loss=134746265757.5385 entropy=4.1408 ent_coef=0.001987 approx_kl=0.0035 kl_stop=1 intervention_rate=0.1022 front_blocked=0
|
|
[Episode 1400] reward=-55843813.7 actor_loss=0.1157 critic_loss=125070923933.5385 entropy=4.1368 ent_coef=0.001987 approx_kl=0.0032 kl_stop=1 intervention_rate=0.0872 front_blocked=0
|
|
[Eval 1400] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-450181.1 mean_steps=12.8
|
|
[Episode 1410] reward=-39090766.4 actor_loss=0.1080 critic_loss=117208355401.1429 entropy=4.1350 ent_coef=0.001987 approx_kl=0.0037 kl_stop=1 intervention_rate=0.0749 front_blocked=0
|
|
[Episode 1420] reward=-52415772.3 actor_loss=0.1101 critic_loss=126114296393.1429 entropy=4.1360 ent_coef=0.001987 approx_kl=0.0023 kl_stop=1 intervention_rate=0.0859 front_blocked=0
|
|
[Eval 1420] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-525859.7 mean_steps=12.0
|
|
[Episode 1430] reward=-49291186.4 actor_loss=0.1155 critic_loss=122417341905.4545 entropy=4.1324 ent_coef=0.001987 approx_kl=0.0036 kl_stop=1 intervention_rate=0.0762 front_blocked=0
|
|
[Episode 1440] reward=-60051845.5 actor_loss=0.1366 critic_loss=130173663914.6667 entropy=4.1299 ent_coef=0.001987 approx_kl=0.0028 kl_stop=1 intervention_rate=0.0872 front_blocked=0
|
|
[Eval 1440] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-462099.0 mean_steps=13.1
|
|
[Episode 1450] reward=-44532358.6 actor_loss=0.1368 critic_loss=124143812608.0000 entropy=4.1315 ent_coef=0.001987 approx_kl=0.0020 kl_stop=1 intervention_rate=0.0788 front_blocked=0
|
|
[Episode 1460] reward=-49693000.5 actor_loss=0.1247 critic_loss=125502818183.5294 entropy=4.1288 ent_coef=0.001987 approx_kl=0.0023 kl_stop=1 intervention_rate=0.0820 front_blocked=0
|
|
[Eval 1460] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-574788.2 mean_steps=12.9
|
|
[Episode 1470] reward=-61313691.0 actor_loss=0.0816 critic_loss=128814980096.0000 entropy=4.1288 ent_coef=0.001987 approx_kl=0.0032 kl_stop=1 intervention_rate=0.0833 front_blocked=0
|
|
[Episode 1480] reward=-51004465.9 actor_loss=0.1502 critic_loss=123033111458.9091 entropy=4.1277 ent_coef=0.001987 approx_kl=0.0030 kl_stop=1 intervention_rate=0.0872 front_blocked=0
|
|
[Eval 1480] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-415578.7 mean_steps=15.2
|
|
[Episode 1490] reward=-51339476.6 actor_loss=0.1106 critic_loss=120746437120.0000 entropy=4.1256 ent_coef=0.001987 approx_kl=0.0024 kl_stop=1 intervention_rate=0.0794 front_blocked=0
|
|
[Episode 1500] reward=-49015461.8 actor_loss=0.1151 critic_loss=122844231972.5714 entropy=4.1240 ent_coef=0.001987 approx_kl=0.0026 kl_stop=1 intervention_rate=0.0840 front_blocked=0
|
|
[Eval 1500] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-523562.1 mean_steps=12.8
|
|
[Episode 1510] reward=-59461164.6 actor_loss=0.1452 critic_loss=127050560853.3333 entropy=4.1264 ent_coef=0.001986 approx_kl=0.0013 kl_stop=1 intervention_rate=0.0859 front_blocked=0
|
|
[Episode 1520] reward=-49369162.6 actor_loss=0.1057 critic_loss=122628199219.2000 entropy=4.1251 ent_coef=0.001986 approx_kl=0.0043 kl_stop=1 intervention_rate=0.0781 front_blocked=0
|
|
[Eval 1520] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-303093.7 mean_steps=15.2
|
|
[Episode 1530] reward=-59182288.1 actor_loss=0.1165 critic_loss=132150705694.1176 entropy=4.1278 ent_coef=0.001986 approx_kl=0.0025 kl_stop=1 intervention_rate=0.0853 front_blocked=0
|
|
[Episode 1540] reward=-60541988.0 actor_loss=0.1243 critic_loss=132349403136.0000 entropy=4.1291 ent_coef=0.001986 approx_kl=0.0007 kl_stop=1 intervention_rate=0.0924 front_blocked=0
|
|
[Eval 1540] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-603035.6 mean_steps=12.3
|
|
[Episode 1550] reward=-48785433.5 actor_loss=0.0776 critic_loss=121545156900.5714 entropy=4.1292 ent_coef=0.001986 approx_kl=0.0039 kl_stop=1 intervention_rate=0.0755 front_blocked=0
|
|
[Episode 1560] reward=-53265918.8 actor_loss=0.1461 critic_loss=123378128896.0000 entropy=4.1304 ent_coef=0.001986 approx_kl=0.0024 kl_stop=1 intervention_rate=0.0840 front_blocked=0
|
|
[Eval 1560] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-578962.1 mean_steps=13.1
|
|
[Episode 1570] reward=-52072390.6 actor_loss=0.1262 critic_loss=125357488730.3529 entropy=4.1280 ent_coef=0.001986 approx_kl=0.0031 kl_stop=1 intervention_rate=0.0840 front_blocked=0
|
|
[Episode 1580] reward=-59649805.6 actor_loss=0.1330 critic_loss=128042774155.6364 entropy=4.1244 ent_coef=0.001986 approx_kl=0.0040 kl_stop=1 intervention_rate=0.0918 front_blocked=0
|
|
[Eval 1580] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-529044.5 mean_steps=12.8
|
|
[Episode 1590] reward=-69774484.6 actor_loss=0.1225 critic_loss=134613527210.6667 entropy=4.1244 ent_coef=0.001986 approx_kl=0.0039 kl_stop=1 intervention_rate=0.0990 front_blocked=0
|
|
[Episode 1600] reward=-52686987.9 actor_loss=0.1130 critic_loss=125500353389.7143 entropy=4.1207 ent_coef=0.001986 approx_kl=0.0026 kl_stop=1 intervention_rate=0.0814 front_blocked=0
|
|
[Eval 1600] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-332534.8 mean_steps=15.3
|
|
[Episode 1610] reward=-56886042.3 actor_loss=0.1349 critic_loss=129004913810.2857 entropy=4.1244 ent_coef=0.001986 approx_kl=0.0057 kl_stop=1 intervention_rate=0.0866 front_blocked=0
|
|
[Episode 1620] reward=-62411967.3 actor_loss=0.0920 critic_loss=133483705856.0000 entropy=4.1238 ent_coef=0.001985 approx_kl=0.0032 kl_stop=1 intervention_rate=0.0794 front_blocked=0
|
|
[Eval 1620] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-409739.7 mean_steps=15.2
|
|
[Episode 1630] reward=-63385857.0 actor_loss=0.1381 critic_loss=127002467805.8667 entropy=4.1216 ent_coef=0.001985 approx_kl=0.0025 kl_stop=1 intervention_rate=0.0911 front_blocked=0
|
|
[Episode 1640] reward=-61245208.0 actor_loss=0.1204 critic_loss=127181111296.0000 entropy=4.1186 ent_coef=0.001985 approx_kl=0.0036 kl_stop=1 intervention_rate=0.0918 front_blocked=0
|
|
[Eval 1640] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-375384.3 mean_steps=14.1
|
|
[Episode 1650] reward=-52838123.4 actor_loss=0.1298 critic_loss=122427999118.2222 entropy=4.1173 ent_coef=0.001985 approx_kl=0.0017 kl_stop=1 intervention_rate=0.0879 front_blocked=0
|
|
[Episode 1660] reward=-44083706.7 actor_loss=0.1191 critic_loss=121130775473.2308 entropy=4.1173 ent_coef=0.001985 approx_kl=0.0032 kl_stop=1 intervention_rate=0.0742 front_blocked=0
|
|
[Eval 1660] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-600349.2 mean_steps=12.9
|
|
[Episode 1670] reward=-48627420.7 actor_loss=0.1110 critic_loss=121996670293.3333 entropy=4.1147 ent_coef=0.001985 approx_kl=0.0014 kl_stop=1 intervention_rate=0.0866 front_blocked=0
|
|
[Episode 1680] reward=-48199420.2 actor_loss=0.1235 critic_loss=124112402139.4286 entropy=4.1122 ent_coef=0.001985 approx_kl=0.0028 kl_stop=1 intervention_rate=0.0859 front_blocked=0
|
|
[Eval 1680] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-481529.1 mean_steps=12.9
|
|
[Episode 1690] reward=-49769636.2 actor_loss=0.1024 critic_loss=124099799040.0000 entropy=4.1117 ent_coef=0.001985 approx_kl=0.0031 kl_stop=1 intervention_rate=0.0859 front_blocked=0
|
|
[Episode 1700] reward=-46313843.5 actor_loss=0.1442 critic_loss=121862192128.0000 entropy=4.1054 ent_coef=0.001985 approx_kl=0.0018 kl_stop=1 intervention_rate=0.0768 front_blocked=0
|
|
[Eval 1700] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-514912.9 mean_steps=12.5
|
|
[Episode 1710] reward=-65205204.5 actor_loss=0.1257 critic_loss=131924271104.0000 entropy=4.1044 ent_coef=0.001985 approx_kl=0.0027 kl_stop=1 intervention_rate=0.0905 front_blocked=0
|
|
[Episode 1720] reward=-56985857.0 actor_loss=0.1216 critic_loss=127774845220.5714 entropy=4.1039 ent_coef=0.001985 approx_kl=0.0041 kl_stop=1 intervention_rate=0.0866 front_blocked=0
|
|
[Eval 1720] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-530210.8 mean_steps=12.6
|
|
[Episode 1730] reward=-49994712.0 actor_loss=0.1331 critic_loss=122109238125.7143 entropy=4.1020 ent_coef=0.001984 approx_kl=0.0048 kl_stop=1 intervention_rate=0.0788 front_blocked=0
|
|
[Episode 1740] reward=-52198108.4 actor_loss=0.1046 critic_loss=122739267584.0000 entropy=4.0990 ent_coef=0.001984 approx_kl=0.0045 kl_stop=1 intervention_rate=0.0807 front_blocked=0
|
|
[Eval 1740] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-550971.7 mean_steps=12.8
|
|
[Episode 1750] reward=-55956008.5 actor_loss=0.1128 critic_loss=127607811510.8571 entropy=4.0995 ent_coef=0.001984 approx_kl=0.0028 kl_stop=1 intervention_rate=0.0859 front_blocked=0
|
|
[Episode 1760] reward=-54454736.2 actor_loss=0.1280 critic_loss=128011403264.0000 entropy=4.1034 ent_coef=0.001984 approx_kl=0.0034 kl_stop=1 intervention_rate=0.0859 front_blocked=0
|
|
[Eval 1760] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-623105.9 mean_steps=10.8
|
|
[Episode 1770] reward=-63373024.9 actor_loss=0.1523 critic_loss=130343031808.0000 entropy=4.1056 ent_coef=0.001984 approx_kl=0.0022 kl_stop=1 intervention_rate=0.0944 front_blocked=0
|
|
[Episode 1780] reward=-58723310.2 actor_loss=0.1149 critic_loss=130848843776.0000 entropy=4.1036 ent_coef=0.001984 approx_kl=0.0037 kl_stop=1 intervention_rate=0.0840 front_blocked=0
|
|
[Eval 1780] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-557814.5 mean_steps=12.8
|
|
[Episode 1790] reward=-50107095.9 actor_loss=0.1281 critic_loss=125379127768.6154 entropy=4.1030 ent_coef=0.001984 approx_kl=0.0020 kl_stop=1 intervention_rate=0.0911 front_blocked=0
|
|
[Episode 1800] reward=-52620787.2 actor_loss=0.1207 critic_loss=128091688960.0000 entropy=4.1003 ent_coef=0.001984 approx_kl=0.0030 kl_stop=1 intervention_rate=0.0755 front_blocked=0
|
|
[Eval 1800] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-470032.4 mean_steps=13.8
|
|
[Episode 1810] reward=-50291266.0 actor_loss=0.1043 critic_loss=123014140723.2000 entropy=4.0958 ent_coef=0.001984 approx_kl=0.0025 kl_stop=1 intervention_rate=0.0827 front_blocked=0
|
|
[Episode 1820] reward=-52580249.7 actor_loss=0.1216 critic_loss=125064959863.4667 entropy=4.0948 ent_coef=0.001984 approx_kl=0.0028 kl_stop=1 intervention_rate=0.0853 front_blocked=0
|
|
[Eval 1820] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-350302.5 mean_steps=15.4
|
|
[Episode 1830] reward=-52669214.8 actor_loss=0.1145 critic_loss=124500617898.6667 entropy=4.0955 ent_coef=0.001984 approx_kl=0.0028 kl_stop=1 intervention_rate=0.0833 front_blocked=0
|
|
[Episode 1840] reward=-51762465.3 actor_loss=0.0980 critic_loss=124772367639.2727 entropy=4.0938 ent_coef=0.001983 approx_kl=0.0029 kl_stop=1 intervention_rate=0.0814 front_blocked=0
|
|
[Eval 1840] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-439439.1 mean_steps=14.2
|
|
[Episode 1850] reward=-59722464.6 actor_loss=0.1167 critic_loss=133873877811.2000 entropy=4.0884 ent_coef=0.001983 approx_kl=0.0017 kl_stop=1 intervention_rate=0.0846 front_blocked=0
|
|
[Episode 1860] reward=-51176514.5 actor_loss=0.0814 critic_loss=128321535089.7778 entropy=4.0928 ent_coef=0.001983 approx_kl=0.0037 kl_stop=1 intervention_rate=0.0729 front_blocked=0
|
|
[Eval 1860] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-529782.0 mean_steps=11.7
|
|
[Episode 1870] reward=-48545295.4 actor_loss=0.0928 critic_loss=120300817314.9091 entropy=4.0886 ent_coef=0.001983 approx_kl=0.0038 kl_stop=1 intervention_rate=0.0768 front_blocked=0
|
|
[Episode 1880] reward=-47383357.8 actor_loss=0.1210 critic_loss=121125639509.3333 entropy=4.0893 ent_coef=0.001983 approx_kl=0.0037 kl_stop=1 intervention_rate=0.0840 front_blocked=0
|
|
[Eval 1880] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-356324.5 mean_steps=14.6
|
|
[Episode 1890] reward=-52411751.0 actor_loss=0.1283 critic_loss=128231432192.0000 entropy=4.0885 ent_coef=0.001983 approx_kl=0.0015 kl_stop=1 intervention_rate=0.0794 front_blocked=0
|
|
[Episode 1900] reward=-73198175.4 actor_loss=0.1258 critic_loss=134873777493.3333 entropy=4.0874 ent_coef=0.001983 approx_kl=0.0024 kl_stop=1 intervention_rate=0.1003 front_blocked=0
|
|
[Eval 1900] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-405296.4 mean_steps=14.2
|
|
[Episode 1910] reward=-47082244.0 actor_loss=0.0825 critic_loss=122621119394.9091 entropy=4.0818 ent_coef=0.001983 approx_kl=0.0014 kl_stop=1 intervention_rate=0.0736 front_blocked=0
|
|
[Episode 1920] reward=-49329279.0 actor_loss=0.1173 critic_loss=126767891602.2857 entropy=4.0830 ent_coef=0.001983 approx_kl=0.0015 kl_stop=1 intervention_rate=0.0814 front_blocked=0
|
|
[Eval 1920] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-450997.0 mean_steps=14.4
|
|
[Episode 1930] reward=-48565822.1 actor_loss=0.1446 critic_loss=125770293248.0000 entropy=4.0815 ent_coef=0.001983 approx_kl=0.0040 kl_stop=1 intervention_rate=0.0723 front_blocked=0
|
|
[Episode 1940] reward=-54673347.0 actor_loss=0.1124 critic_loss=124999203840.0000 entropy=4.0794 ent_coef=0.001983 approx_kl=0.0038 kl_stop=1 intervention_rate=0.0716 front_blocked=0
|
|
[Eval 1940] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-574590.9 mean_steps=12.5
|
|
[Episode 1950] reward=-54993073.3 actor_loss=0.1010 critic_loss=126125159219.2000 entropy=4.0782 ent_coef=0.001982 approx_kl=0.0006 kl_stop=1 intervention_rate=0.0846 front_blocked=0
|
|
[Episode 1960] reward=-60185450.4 actor_loss=0.1555 critic_loss=127062169600.0000 entropy=4.0780 ent_coef=0.001982 approx_kl=0.0033 kl_stop=1 intervention_rate=0.0931 front_blocked=0
|
|
[Eval 1960] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-332549.7 mean_steps=15.2
|
|
[Episode 1970] reward=-44679406.5 actor_loss=0.0939 critic_loss=121299503549.2174 entropy=4.0767 ent_coef=0.001982 approx_kl=0.0035 kl_stop=1 intervention_rate=0.0781 front_blocked=0
|
|
[Episode 1980] reward=-44211634.3 actor_loss=0.0889 critic_loss=118148429637.8182 entropy=4.0765 ent_coef=0.001982 approx_kl=0.0030 kl_stop=1 intervention_rate=0.0814 front_blocked=0
|
|
[Eval 1980] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-419304.3 mean_steps=13.3
|
|
[Episode 1990] reward=-60140477.0 actor_loss=0.0969 critic_loss=126555492352.0000 entropy=4.0735 ent_coef=0.001982 approx_kl=0.0033 kl_stop=1 intervention_rate=0.0846 front_blocked=0
|
|
[Episode 2000] reward=-54445089.5 actor_loss=0.1058 critic_loss=128727488512.0000 entropy=4.0727 ent_coef=0.001982 approx_kl=0.0048 kl_stop=1 intervention_rate=0.0801 front_blocked=0
|
|
[Eval 2000] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-549781.5 mean_steps=12.8
|
|
[Episode 2010] reward=-54142735.0 actor_loss=0.1089 critic_loss=126954022229.3333 entropy=4.0728 ent_coef=0.001982 approx_kl=0.0025 kl_stop=1 intervention_rate=0.0788 front_blocked=0
|
|
[Episode 2020] reward=-53294698.2 actor_loss=0.1049 critic_loss=125974206327.4667 entropy=4.0707 ent_coef=0.001982 approx_kl=0.0011 kl_stop=1 intervention_rate=0.0768 front_blocked=0
|
|
[Eval 2020] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-552614.4 mean_steps=12.7
|
|
[Episode 2030] reward=-62093892.3 actor_loss=0.1485 critic_loss=131182927872.0000 entropy=4.0731 ent_coef=0.001982 approx_kl=0.0032 kl_stop=1 intervention_rate=0.0964 front_blocked=0
|
|
[Episode 2040] reward=-52287142.4 actor_loss=0.1167 critic_loss=126350289826.9091 entropy=4.0703 ent_coef=0.001982 approx_kl=0.0028 kl_stop=1 intervention_rate=0.0820 front_blocked=0
|
|
[Eval 2040] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-507730.9 mean_steps=13.2
|
|
[Episode 2050] reward=-49233277.7 actor_loss=0.1156 critic_loss=125271705442.4615 entropy=4.0699 ent_coef=0.001982 approx_kl=0.0050 kl_stop=1 intervention_rate=0.0859 front_blocked=0
|
|
[Episode 2060] reward=-55887223.0 actor_loss=0.1422 critic_loss=128107844403.2000 entropy=4.0738 ent_coef=0.001981 approx_kl=0.0042 kl_stop=1 intervention_rate=0.0905 front_blocked=0
|
|
[Eval 2060] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-409075.3 mean_steps=14.8
|
|
[Episode 2070] reward=-65710910.6 actor_loss=0.1246 critic_loss=133782807552.0000 entropy=4.0754 ent_coef=0.001981 approx_kl=0.0033 kl_stop=1 intervention_rate=0.0938 front_blocked=0
|
|
[Episode 2080] reward=-61032695.5 actor_loss=0.1073 critic_loss=130958512128.0000 entropy=4.0760 ent_coef=0.001981 approx_kl=0.0018 kl_stop=1 intervention_rate=0.0833 front_blocked=0
|
|
[Eval 2080] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-651565.6 mean_steps=10.9
|
|
[Episode 2090] reward=-49726474.8 actor_loss=0.0971 critic_loss=126003790454.1538 entropy=4.0716 ent_coef=0.001981 approx_kl=0.0029 kl_stop=1 intervention_rate=0.0775 front_blocked=0
|
|
[Episode 2100] reward=-50638503.9 actor_loss=0.1127 critic_loss=126745647331.5556 entropy=4.0709 ent_coef=0.001981 approx_kl=0.0013 kl_stop=1 intervention_rate=0.0840 front_blocked=0
|
|
[Eval 2100] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-402742.3 mean_steps=14.2
|
|
[Episode 2110] reward=-55935748.2 actor_loss=0.1396 critic_loss=126883635200.0000 entropy=4.0709 ent_coef=0.001981 approx_kl=0.0041 kl_stop=1 intervention_rate=0.0892 front_blocked=0
|
|
[Episode 2120] reward=-45660275.9 actor_loss=0.1101 critic_loss=122743617063.3846 entropy=4.0722 ent_coef=0.001981 approx_kl=0.0001 kl_stop=1 intervention_rate=0.0794 front_blocked=0
|
|
[Eval 2120] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-519735.9 mean_steps=13.4
|
|
[Episode 2130] reward=-48422532.7 actor_loss=0.1346 critic_loss=124192468406.8571 entropy=4.0696 ent_coef=0.001981 approx_kl=0.0044 kl_stop=1 intervention_rate=0.0827 front_blocked=0
|
|
[Episode 2140] reward=-51000114.2 actor_loss=0.0955 critic_loss=123832084322.4615 entropy=4.0681 ent_coef=0.001981 approx_kl=0.0019 kl_stop=1 intervention_rate=0.0833 front_blocked=0
|
|
[Eval 2140] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-368900.4 mean_steps=14.1
|
|
[Episode 2150] reward=-58041864.2 actor_loss=0.1125 critic_loss=125123690496.0000 entropy=4.0670 ent_coef=0.001981 approx_kl=0.0032 kl_stop=1 intervention_rate=0.0898 front_blocked=0
|
|
[Episode 2160] reward=-56514291.4 actor_loss=0.1128 critic_loss=127018604544.0000 entropy=4.0701 ent_coef=0.001981 approx_kl=0.0017 kl_stop=1 intervention_rate=0.0814 front_blocked=0
|
|
[Eval 2160] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-472572.9 mean_steps=12.2
|
|
[Episode 2170] reward=-43878984.8 actor_loss=0.1224 critic_loss=119477238897.7778 entropy=4.0692 ent_coef=0.001980 approx_kl=0.0050 kl_stop=1 intervention_rate=0.0742 front_blocked=0
|
|
[Episode 2180] reward=-61082126.5 actor_loss=0.1225 critic_loss=132297256960.0000 entropy=4.0678 ent_coef=0.001980 approx_kl=0.0016 kl_stop=1 intervention_rate=0.0872 front_blocked=0
|
|
[Eval 2180] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-555149.9 mean_steps=11.8
|
|
[Episode 2190] reward=-55729889.6 actor_loss=0.1230 critic_loss=119404581888.0000 entropy=4.0697 ent_coef=0.001980 approx_kl=0.0006 kl_stop=1 intervention_rate=0.0879 front_blocked=0
|
|
[Episode 2200] reward=-61560447.8 actor_loss=0.1077 critic_loss=133212755558.4000 entropy=4.0676 ent_coef=0.001980 approx_kl=0.0017 kl_stop=1 intervention_rate=0.0905 front_blocked=0
|
|
[Eval 2200] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-359403.5 mean_steps=15.3
|
|
[Episode 2210] reward=-65295769.3 actor_loss=0.1342 critic_loss=132900278272.0000 entropy=4.0619 ent_coef=0.001980 approx_kl=0.0039 kl_stop=1 intervention_rate=0.0944 front_blocked=0
|
|
[Episode 2220] reward=-43088229.5 actor_loss=0.0813 critic_loss=120759542784.0000 entropy=4.0601 ent_coef=0.001980 approx_kl=0.0029 kl_stop=1 intervention_rate=0.0684 front_blocked=0
|
|
[Eval 2220] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-456426.1 mean_steps=12.8
|
|
[Episode 2230] reward=-58843539.9 actor_loss=0.1300 critic_loss=133060039475.2000 entropy=4.0622 ent_coef=0.001980 approx_kl=0.0024 kl_stop=1 intervention_rate=0.0866 front_blocked=0
|
|
[Episode 2240] reward=-62638631.2 actor_loss=0.1333 critic_loss=130586166737.4545 entropy=4.0597 ent_coef=0.001980 approx_kl=0.0035 kl_stop=1 intervention_rate=0.0859 front_blocked=0
|
|
[Eval 2240] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-476388.7 mean_steps=13.7
|
|
[Episode 2250] reward=-52686355.0 actor_loss=0.0998 critic_loss=126380361728.0000 entropy=4.0548 ent_coef=0.001980 approx_kl=0.0041 kl_stop=1 intervention_rate=0.0768 front_blocked=0
|
|
[Episode 2260] reward=-43566378.6 actor_loss=0.1149 critic_loss=118773381997.7143 entropy=4.0528 ent_coef=0.001980 approx_kl=0.0027 kl_stop=1 intervention_rate=0.0788 front_blocked=0
|
|
[Eval 2260] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-422102.0 mean_steps=13.4
|
|
[Episode 2270] reward=-54795115.3 actor_loss=0.1246 critic_loss=127322658343.3846 entropy=4.0531 ent_coef=0.001980 approx_kl=0.0036 kl_stop=1 intervention_rate=0.0898 front_blocked=0
|
|
[Episode 2280] reward=-50414976.2 actor_loss=0.0921 critic_loss=123753961335.4667 entropy=4.0505 ent_coef=0.001979 approx_kl=0.0032 kl_stop=1 intervention_rate=0.0749 front_blocked=0
|
|
[Eval 2280] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-481236.7 mean_steps=13.1
|
|
[Episode 2290] reward=-51559816.3 actor_loss=0.1360 critic_loss=119673387235.5556 entropy=4.0513 ent_coef=0.001979 approx_kl=0.0007 kl_stop=1 intervention_rate=0.0905 front_blocked=0
|
|
[Episode 2300] reward=-57324409.0 actor_loss=0.1196 critic_loss=125904997580.8000 entropy=4.0512 ent_coef=0.001979 approx_kl=0.0014 kl_stop=1 intervention_rate=0.0820 front_blocked=0
|
|
[Eval 2300] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-622378.5 mean_steps=11.7
|
|
[Episode 2310] reward=-56518793.6 actor_loss=0.1224 critic_loss=126660173824.0000 entropy=4.0516 ent_coef=0.001979 approx_kl=0.0010 kl_stop=1 intervention_rate=0.0833 front_blocked=0
|
|
[Episode 2320] reward=-53820510.3 actor_loss=0.1110 critic_loss=122521085542.4000 entropy=4.0457 ent_coef=0.001979 approx_kl=0.0038 kl_stop=1 intervention_rate=0.0872 front_blocked=0
|
|
[Eval 2320] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-540431.0 mean_steps=11.9
|
|
[Episode 2330] reward=-49829354.0 actor_loss=0.1015 critic_loss=119828327537.7778 entropy=4.0464 ent_coef=0.001979 approx_kl=0.0014 kl_stop=1 intervention_rate=0.0833 front_blocked=0
|
|
[Episode 2340] reward=-58652924.8 actor_loss=0.0787 critic_loss=131572744192.0000 entropy=4.0478 ent_coef=0.001979 approx_kl=0.0043 kl_stop=1 intervention_rate=0.0801 front_blocked=0
|
|
[Eval 2340] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-453780.4 mean_steps=12.8
|
|
[Episode 2350] reward=-55311126.7 actor_loss=0.1182 critic_loss=123925835502.9333 entropy=4.0468 ent_coef=0.001979 approx_kl=0.0030 kl_stop=1 intervention_rate=0.0853 front_blocked=0
|
|
[Episode 2360] reward=-51141658.2 actor_loss=0.1247 critic_loss=123763013632.0000 entropy=4.0484 ent_coef=0.001979 approx_kl=0.0031 kl_stop=1 intervention_rate=0.0814 front_blocked=0
|
|
[Eval 2360] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-594342.4 mean_steps=12.2
|
|
[Episode 2370] reward=-56353033.7 actor_loss=0.1057 critic_loss=125378898850.9091 entropy=4.0471 ent_coef=0.001979 approx_kl=0.0026 kl_stop=1 intervention_rate=0.0840 front_blocked=0
|
|
[Episode 2380] reward=-50565828.6 actor_loss=0.0879 critic_loss=124535604370.2857 entropy=4.0436 ent_coef=0.001979 approx_kl=0.0025 kl_stop=1 intervention_rate=0.0820 front_blocked=0
|
|
[Eval 2380] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-377191.0 mean_steps=13.8
|
|
[Episode 2390] reward=-53931643.9 actor_loss=0.1188 critic_loss=125877915963.0769 entropy=4.0431 ent_coef=0.001978 approx_kl=0.0035 kl_stop=1 intervention_rate=0.0814 front_blocked=0
|
|
[Episode 2400] reward=-45988564.9 actor_loss=0.0938 critic_loss=122517406659.7647 entropy=4.0429 ent_coef=0.001978 approx_kl=0.0044 kl_stop=1 intervention_rate=0.0781 front_blocked=0
|
|
[Eval 2400] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-540208.7 mean_steps=12.4
|
|
[Episode 2410] reward=-52063911.3 actor_loss=0.1021 critic_loss=121159997667.5556 entropy=4.0416 ent_coef=0.001978 approx_kl=0.0023 kl_stop=1 intervention_rate=0.0814 front_blocked=0
|
|
[Episode 2420] reward=-49917223.2 actor_loss=0.1078 critic_loss=124858667463.1111 entropy=4.0429 ent_coef=0.001978 approx_kl=0.0013 kl_stop=1 intervention_rate=0.0749 front_blocked=0
|
|
[Eval 2420] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-552470.9 mean_steps=12.0
|
|
[Episode 2430] reward=-54173825.2 actor_loss=0.0815 critic_loss=126419917482.6667 entropy=4.0415 ent_coef=0.001978 approx_kl=0.0034 kl_stop=1 intervention_rate=0.0801 front_blocked=0
|
|
[Episode 2440] reward=-49243107.1 actor_loss=0.1163 critic_loss=122187856430.5455 entropy=4.0431 ent_coef=0.001978 approx_kl=0.0031 kl_stop=1 intervention_rate=0.0690 front_blocked=0
|
|
[Eval 2440] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-469371.4 mean_steps=13.1
|
|
[Episode 2450] reward=-50567540.5 actor_loss=0.1151 critic_loss=120095110875.4286 entropy=4.0403 ent_coef=0.001978 approx_kl=0.0039 kl_stop=1 intervention_rate=0.0801 front_blocked=0
|
|
[Episode 2460] reward=-62124179.9 actor_loss=0.1174 critic_loss=131110531657.1429 entropy=4.0394 ent_coef=0.001978 approx_kl=0.0032 kl_stop=1 intervention_rate=0.0879 front_blocked=0
|
|
[Eval 2460] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-623421.1 mean_steps=11.6
|
|
[Episode 2470] reward=-39304688.8 actor_loss=0.0965 critic_loss=115660618865.7778 entropy=4.0385 ent_coef=0.001978 approx_kl=0.0025 kl_stop=1 intervention_rate=0.0736 front_blocked=0
|
|
[Episode 2480] reward=-58989419.9 actor_loss=0.0773 critic_loss=126975424325.8182 entropy=4.0406 ent_coef=0.001978 approx_kl=0.0028 kl_stop=1 intervention_rate=0.0801 front_blocked=0
|
|
[Eval 2480] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-663475.7 mean_steps=12.0
|
|
[Episode 2490] reward=-40447094.1 actor_loss=0.1187 critic_loss=116279048005.8182 entropy=4.0398 ent_coef=0.001978 approx_kl=0.0026 kl_stop=1 intervention_rate=0.0723 front_blocked=0
|
|
[Episode 2500] reward=-41532839.0 actor_loss=0.1232 critic_loss=116452249972.3636 entropy=4.0365 ent_coef=0.001978 approx_kl=0.0037 kl_stop=1 intervention_rate=0.0762 front_blocked=0
|
|
[Eval 2500] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-595073.8 mean_steps=11.6
|
|
[Episode 2510] reward=-50294786.3 actor_loss=0.1272 critic_loss=121396253491.2000 entropy=4.0312 ent_coef=0.001977 approx_kl=0.0024 kl_stop=1 intervention_rate=0.0827 front_blocked=0
|
|
[Episode 2520] reward=-54257444.0 actor_loss=0.0927 critic_loss=125168168504.8889 entropy=4.0284 ent_coef=0.001977 approx_kl=0.0022 kl_stop=1 intervention_rate=0.0703 front_blocked=0
|
|
[Eval 2520] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-452189.8 mean_steps=12.0
|
|
[Episode 2530] reward=-45174235.7 actor_loss=0.0947 critic_loss=122619846656.0000 entropy=4.0335 ent_coef=0.001977 approx_kl=0.0039 kl_stop=1 intervention_rate=0.0671 front_blocked=0
|
|
[Episode 2540] reward=-42868012.5 actor_loss=0.1463 critic_loss=119897640960.0000 entropy=4.0336 ent_coef=0.001977 approx_kl=0.0013 kl_stop=1 intervention_rate=0.0833 front_blocked=0
|
|
[Eval 2540] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-445128.3 mean_steps=15.4
|
|
[Episode 2550] reward=-51417073.7 actor_loss=0.1363 critic_loss=123832698880.0000 entropy=4.0329 ent_coef=0.001977 approx_kl=0.0043 kl_stop=1 intervention_rate=0.0775 front_blocked=0
|
|
[Episode 2560] reward=-50062093.6 actor_loss=0.1308 critic_loss=122101729280.0000 entropy=4.0336 ent_coef=0.001977 approx_kl=0.0027 kl_stop=1 intervention_rate=0.0820 front_blocked=0
|
|
[Eval 2560] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-355435.2 mean_steps=15.5
|
|
[Episode 2570] reward=-44099546.1 actor_loss=0.0877 critic_loss=122637892812.8000 entropy=4.0326 ent_coef=0.001977 approx_kl=0.0031 kl_stop=1 intervention_rate=0.0749 front_blocked=0
|
|
[Episode 2580] reward=-55019348.2 actor_loss=0.1088 critic_loss=125609609095.5294 entropy=4.0302 ent_coef=0.001977 approx_kl=0.0021 kl_stop=1 intervention_rate=0.0827 front_blocked=0
|
|
[Eval 2580] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-420774.1 mean_steps=15.1
|
|
[Episode 2590] reward=-59780119.6 actor_loss=0.1111 critic_loss=130767817176.6154 entropy=4.0314 ent_coef=0.001977 approx_kl=0.0027 kl_stop=1 intervention_rate=0.0833 front_blocked=0
|
|
[Episode 2600] reward=-54060538.6 actor_loss=0.1392 critic_loss=125580928000.0000 entropy=4.0296 ent_coef=0.001977 approx_kl=0.0043 kl_stop=1 intervention_rate=0.0885 front_blocked=0
|
|
[Eval 2600] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-502594.5 mean_steps=12.4
|
|
[Episode 2610] reward=-52034769.5 actor_loss=0.1102 critic_loss=127406380919.4667 entropy=4.0284 ent_coef=0.001977 approx_kl=0.0048 kl_stop=1 intervention_rate=0.0762 front_blocked=0
|
|
[Episode 2620] reward=-49248357.3 actor_loss=0.0880 critic_loss=123252037222.4000 entropy=4.0250 ent_coef=0.001976 approx_kl=0.0033 kl_stop=1 intervention_rate=0.0742 front_blocked=0
|
|
[Eval 2620] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-409981.7 mean_steps=14.4
|
|
[Episode 2630] reward=-50220787.1 actor_loss=0.0995 critic_loss=124183640473.6000 entropy=4.0223 ent_coef=0.001976 approx_kl=0.0021 kl_stop=1 intervention_rate=0.0801 front_blocked=0
|
|
[Episode 2640] reward=-57936846.7 actor_loss=0.1052 critic_loss=131243006634.6667 entropy=4.0205 ent_coef=0.001976 approx_kl=0.0031 kl_stop=1 intervention_rate=0.0833 front_blocked=0
|
|
[Eval 2640] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-342893.6 mean_steps=14.7
|
|
[Episode 2650] reward=-45217706.0 actor_loss=0.1149 critic_loss=123880671004.4444 entropy=4.0226 ent_coef=0.001976 approx_kl=0.0006 kl_stop=1 intervention_rate=0.0807 front_blocked=0
|
|
[Episode 2660] reward=-51681666.5 actor_loss=0.0771 critic_loss=122814966784.0000 entropy=4.0213 ent_coef=0.001976 approx_kl=0.0039 kl_stop=1 intervention_rate=0.0768 front_blocked=0
|
|
[Eval 2660] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-412672.3 mean_steps=13.4
|
|
[Episode 2670] reward=-52930455.6 actor_loss=0.1111 critic_loss=127670920078.2222 entropy=4.0190 ent_coef=0.001976 approx_kl=0.0006 kl_stop=1 intervention_rate=0.0755 front_blocked=0
|
|
[Episode 2680] reward=-50144879.5 actor_loss=0.1126 critic_loss=125173379072.0000 entropy=4.0192 ent_coef=0.001976 approx_kl=0.0048 kl_stop=1 intervention_rate=0.0781 front_blocked=0
|
|
[Eval 2680] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-433724.6 mean_steps=14.7
|
|
[Episode 2690] reward=-48487123.0 actor_loss=0.0781 critic_loss=125011536008.5333 entropy=4.0199 ent_coef=0.001976 approx_kl=0.0026 kl_stop=1 intervention_rate=0.0729 front_blocked=0
|
|
[Episode 2700] reward=-50399960.3 actor_loss=0.1335 critic_loss=122202152960.0000 entropy=4.0204 ent_coef=0.001976 approx_kl=0.0025 kl_stop=1 intervention_rate=0.0801 front_blocked=0
|
|
[Eval 2700] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-327610.8 mean_steps=16.2
|
|
[Episode 2710] reward=-52123507.2 actor_loss=0.0833 critic_loss=122174562304.0000 entropy=4.0204 ent_coef=0.001976 approx_kl=0.0024 kl_stop=1 intervention_rate=0.0820 front_blocked=0
|
|
[Episode 2720] reward=-58920738.8 actor_loss=0.1109 critic_loss=130330937530.1818 entropy=4.0193 ent_coef=0.001976 approx_kl=0.0005 kl_stop=1 intervention_rate=0.0820 front_blocked=0
|
|
[Eval 2720] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-380212.6 mean_steps=14.9
|
|
[Episode 2730] reward=-53908585.1 actor_loss=0.1320 critic_loss=128066884403.2000 entropy=4.0167 ent_coef=0.001975 approx_kl=0.0028 kl_stop=1 intervention_rate=0.0833 front_blocked=0
|
|
[Episode 2740] reward=-47189970.3 actor_loss=0.1013 critic_loss=123760619520.0000 entropy=4.0121 ent_coef=0.001975 approx_kl=0.0018 kl_stop=1 intervention_rate=0.0762 front_blocked=0
|
|
[Eval 2740] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-460077.6 mean_steps=13.1
|
|
[Episode 2750] reward=-46129478.5 actor_loss=0.1275 critic_loss=123510658935.4667 entropy=4.0129 ent_coef=0.001975 approx_kl=0.0042 kl_stop=1 intervention_rate=0.0775 front_blocked=0
|
|
[Episode 2760] reward=-44856457.2 actor_loss=0.0922 critic_loss=123155338581.3333 entropy=4.0126 ent_coef=0.001975 approx_kl=0.0027 kl_stop=1 intervention_rate=0.0658 front_blocked=0
|
|
[Eval 2760] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-500678.4 mean_steps=13.2
|
|
[Episode 2770] reward=-51321074.1 actor_loss=0.1168 critic_loss=121895664298.6667 entropy=4.0157 ent_coef=0.001975 approx_kl=0.0038 kl_stop=1 intervention_rate=0.0762 front_blocked=0
|
|
[Episode 2780] reward=-49727859.5 actor_loss=0.1429 critic_loss=121420908544.0000 entropy=4.0122 ent_coef=0.001975 approx_kl=0.0030 kl_stop=1 intervention_rate=0.0859 front_blocked=0
|
|
[Eval 2780] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-532484.1 mean_steps=11.9
|
|
[Episode 2790] reward=-49693743.4 actor_loss=0.1059 critic_loss=122074920277.3333 entropy=4.0093 ent_coef=0.001975 approx_kl=0.0032 kl_stop=1 intervention_rate=0.0853 front_blocked=0
|
|
[Episode 2800] reward=-59701001.9 actor_loss=0.1173 critic_loss=132261709141.3333 entropy=4.0028 ent_coef=0.001975 approx_kl=0.0030 kl_stop=1 intervention_rate=0.0840 front_blocked=0
|
|
[Eval 2800] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-485297.6 mean_steps=13.0
|
|
[Episode 2810] reward=-51180572.9 actor_loss=0.1659 critic_loss=120825908792.8889 entropy=4.0008 ent_coef=0.001975 approx_kl=0.0030 kl_stop=1 intervention_rate=0.0911 front_blocked=0
|
|
[Episode 2820] reward=-55768233.6 actor_loss=0.1253 critic_loss=126803173376.0000 entropy=4.0047 ent_coef=0.001975 approx_kl=0.0032 kl_stop=1 intervention_rate=0.0827 front_blocked=0
|
|
[Eval 2820] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-522945.9 mean_steps=12.6
|
|
[Episode 2830] reward=-46600860.8 actor_loss=0.1074 critic_loss=118201279374.2222 entropy=4.0024 ent_coef=0.001975 approx_kl=0.0025 kl_stop=1 intervention_rate=0.0736 front_blocked=0
|
|
[Episode 2840] reward=-52573310.3 actor_loss=0.0992 critic_loss=122173873590.8571 entropy=4.0020 ent_coef=0.001974 approx_kl=0.0034 kl_stop=1 intervention_rate=0.0775 front_blocked=0
|
|
[Eval 2840] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-467127.3 mean_steps=14.2
|
|
[Episode 2850] reward=-50698600.0 actor_loss=0.1291 critic_loss=122945580236.8000 entropy=4.0016 ent_coef=0.001974 approx_kl=0.0028 kl_stop=1 intervention_rate=0.0775 front_blocked=0
|
|
[Episode 2860] reward=-54891439.9 actor_loss=0.1149 critic_loss=123937097864.5333 entropy=4.0027 ent_coef=0.001974 approx_kl=0.0015 kl_stop=1 intervention_rate=0.0768 front_blocked=0
|
|
[Eval 2860] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-441844.9 mean_steps=12.1
|
|
[Episode 2870] reward=-48606112.3 actor_loss=0.0976 critic_loss=119690994408.7273 entropy=3.9984 ent_coef=0.001974 approx_kl=0.0042 kl_stop=1 intervention_rate=0.0833 front_blocked=0
|
|
[Episode 2880] reward=-52425688.2 actor_loss=0.0868 critic_loss=120717084672.0000 entropy=4.0010 ent_coef=0.001974 approx_kl=0.0009 kl_stop=1 intervention_rate=0.0807 front_blocked=0
|
|
[Eval 2880] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-522662.1 mean_steps=13.6
|
|
[Episode 2890] reward=-54627539.5 actor_loss=0.1117 critic_loss=123964825600.0000 entropy=3.9980 ent_coef=0.001974 approx_kl=0.0044 kl_stop=1 intervention_rate=0.0840 front_blocked=0
|
|
[Episode 2900] reward=-45840944.2 actor_loss=0.1026 critic_loss=119868836522.6667 entropy=3.9997 ent_coef=0.001974 approx_kl=0.0028 kl_stop=1 intervention_rate=0.0814 front_blocked=0
|
|
[Eval 2900] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-559463.7 mean_steps=12.3
|
|
[Episode 2910] reward=-52345762.2 actor_loss=0.1071 critic_loss=121366340494.2222 entropy=4.0041 ent_coef=0.001974 approx_kl=0.0022 kl_stop=1 intervention_rate=0.0820 front_blocked=0
|
|
[Episode 2920] reward=-50673725.5 actor_loss=0.1290 critic_loss=121146596010.6667 entropy=4.0037 ent_coef=0.001974 approx_kl=0.0022 kl_stop=1 intervention_rate=0.0827 front_blocked=0
|
|
[Eval 2920] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-403693.0 mean_steps=13.6
|
|
[Episode 2930] reward=-62884705.2 actor_loss=0.1458 critic_loss=130363504453.8182 entropy=4.0076 ent_coef=0.001974 approx_kl=0.0034 kl_stop=1 intervention_rate=0.0977 front_blocked=0
|
|
[Episode 2940] reward=-48073551.3 actor_loss=0.1004 critic_loss=121325615104.0000 entropy=4.0048 ent_coef=0.001974 approx_kl=0.0033 kl_stop=1 intervention_rate=0.0736 front_blocked=0
|
|
[Eval 2940] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-632408.2 mean_steps=11.8
|
|
[Episode 2950] reward=-53530346.9 actor_loss=0.1204 critic_loss=125036408246.8571 entropy=4.0019 ent_coef=0.001973 approx_kl=0.0031 kl_stop=1 intervention_rate=0.0781 front_blocked=0
|
|
[Episode 2960] reward=-58880816.3 actor_loss=0.0957 critic_loss=130902640932.5714 entropy=4.0059 ent_coef=0.001973 approx_kl=0.0022 kl_stop=1 intervention_rate=0.0905 front_blocked=0
|
|
[Eval 2960] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-431888.3 mean_steps=13.7
|
|
[Episode 2970] reward=-41468565.8 actor_loss=0.1221 critic_loss=121918638080.0000 entropy=4.0043 ent_coef=0.001973 approx_kl=0.0036 kl_stop=1 intervention_rate=0.0729 front_blocked=0
|
|
[Episode 2980] reward=-39395067.6 actor_loss=0.0915 critic_loss=116690096128.0000 entropy=4.0068 ent_coef=0.001973 approx_kl=0.0002 kl_stop=1 intervention_rate=0.0729 front_blocked=0
|
|
[Eval 2980] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-469768.6 mean_steps=12.9
|
|
[Episode 2990] reward=-63991620.9 actor_loss=0.1300 critic_loss=132328393728.0000 entropy=4.0103 ent_coef=0.001973 approx_kl=0.0012 kl_stop=1 intervention_rate=0.0905 front_blocked=0
|
|
[Episode 3000] reward=-59537215.3 actor_loss=0.1079 critic_loss=129644019712.0000 entropy=4.0082 ent_coef=0.001973 approx_kl=0.0020 kl_stop=1 intervention_rate=0.0840 front_blocked=0
|
|
[Eval 3000] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-497893.3 mean_steps=12.5
|
|
[Episode 3010] reward=-51555384.8 actor_loss=0.1273 critic_loss=121689305088.0000 entropy=4.0099 ent_coef=0.001973 approx_kl=0.0013 kl_stop=1 intervention_rate=0.0827 front_blocked=0
|
|
[Episode 3020] reward=-38381983.1 actor_loss=0.0887 critic_loss=118569108821.3333 entropy=4.0080 ent_coef=0.001973 approx_kl=0.0015 kl_stop=1 intervention_rate=0.0710 front_blocked=0
|
|
[Eval 3020] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-355732.5 mean_steps=15.9
|
|
[Episode 3030] reward=-54641825.8 actor_loss=0.1044 critic_loss=124966982997.3333 entropy=4.0086 ent_coef=0.001973 approx_kl=0.0047 kl_stop=1 intervention_rate=0.0872 front_blocked=0
|
|
[Episode 3040] reward=-46032005.8 actor_loss=0.0965 critic_loss=121649736089.6000 entropy=4.0051 ent_coef=0.001973 approx_kl=0.0026 kl_stop=1 intervention_rate=0.0729 front_blocked=0
|
|
[Eval 3040] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-336860.6 mean_steps=14.8
|
|
[Episode 3050] reward=-50755127.6 actor_loss=0.1041 critic_loss=122138318711.4667 entropy=4.0013 ent_coef=0.001973 approx_kl=0.0032 kl_stop=1 intervention_rate=0.0742 front_blocked=0
|
|
[Episode 3060] reward=-49805425.5 actor_loss=0.1040 critic_loss=121791428608.0000 entropy=4.0057 ent_coef=0.001972 approx_kl=0.0020 kl_stop=1 intervention_rate=0.0801 front_blocked=0
|
|
[Eval 3060] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-606873.6 mean_steps=12.5
|
|
[Episode 3070] reward=-46469870.3 actor_loss=0.0691 critic_loss=123167624098.9091 entropy=4.0060 ent_coef=0.001972 approx_kl=0.0019 kl_stop=1 intervention_rate=0.0710 front_blocked=0
|
|
[Episode 3080] reward=-52548106.4 actor_loss=0.1107 critic_loss=128171810816.0000 entropy=4.0064 ent_coef=0.001972 approx_kl=0.0037 kl_stop=1 intervention_rate=0.0814 front_blocked=0
|
|
[Eval 3080] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-490227.3 mean_steps=13.1
|
|
[Episode 3090] reward=-57443887.7 actor_loss=0.1184 critic_loss=125879250124.8000 entropy=4.0063 ent_coef=0.001972 approx_kl=0.0014 kl_stop=1 intervention_rate=0.0827 front_blocked=0
|
|
[Episode 3100] reward=-41792372.5 actor_loss=0.1179 critic_loss=118410251170.9091 entropy=4.0035 ent_coef=0.001972 approx_kl=-0.0007 kl_stop=1 intervention_rate=0.0866 front_blocked=0
|
|
[Eval 3100] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-438260.4 mean_steps=13.8
|
|
[Episode 3110] reward=-57901885.8 actor_loss=0.1602 critic_loss=125961866581.3333 entropy=4.0010 ent_coef=0.001972 approx_kl=0.0018 kl_stop=1 intervention_rate=0.0905 front_blocked=0
|
|
[Episode 3120] reward=-48955394.5 actor_loss=0.1175 critic_loss=125117228178.2857 entropy=4.0007 ent_coef=0.001972 approx_kl=0.0032 kl_stop=1 intervention_rate=0.0775 front_blocked=0
|
|
[Eval 3120] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-587412.2 mean_steps=13.8
|
|
[Episode 3130] reward=-49921625.6 actor_loss=0.1071 critic_loss=124588500992.0000 entropy=4.0021 ent_coef=0.001972 approx_kl=0.0009 kl_stop=1 intervention_rate=0.0736 front_blocked=0
|
|
[Episode 3140] reward=-47694478.5 actor_loss=0.0981 critic_loss=123262646135.4667 entropy=4.0021 ent_coef=0.001972 approx_kl=0.0022 kl_stop=1 intervention_rate=0.0749 front_blocked=0
|
|
[Eval 3140] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-541876.0 mean_steps=11.8
|
|
[Episode 3150] reward=-58968157.5 actor_loss=0.1082 critic_loss=126563526842.1818 entropy=4.0004 ent_coef=0.001972 approx_kl=0.0021 kl_stop=1 intervention_rate=0.0840 front_blocked=0
|
|
[Episode 3160] reward=-43430056.4 actor_loss=0.0933 critic_loss=121295697237.3333 entropy=4.0042 ent_coef=0.001972 approx_kl=0.0016 kl_stop=1 intervention_rate=0.0736 front_blocked=0
|
|
[Eval 3160] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-490926.5 mean_steps=13.3
|
|
[Episode 3170] reward=-64155514.1 actor_loss=0.1070 critic_loss=135769002666.6667 entropy=4.0044 ent_coef=0.001971 approx_kl=0.0017 kl_stop=1 intervention_rate=0.0833 front_blocked=0
|
|
[Episode 3180] reward=-51165459.8 actor_loss=0.1054 critic_loss=124274513627.4286 entropy=4.0059 ent_coef=0.001971 approx_kl=0.0026 kl_stop=1 intervention_rate=0.0742 front_blocked=0
|
|
[Eval 3180] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-472307.5 mean_steps=13.9
|
|
[Episode 3190] reward=-49651322.8 actor_loss=0.1106 critic_loss=120604138837.3333 entropy=4.0070 ent_coef=0.001971 approx_kl=0.0019 kl_stop=1 intervention_rate=0.0775 front_blocked=0
|
|
[Episode 3200] reward=-58162933.7 actor_loss=0.0916 critic_loss=128360886272.0000 entropy=4.0079 ent_coef=0.001971 approx_kl=0.0040 kl_stop=1 intervention_rate=0.0794 front_blocked=0
|
|
[Eval 3200] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-459531.0 mean_steps=13.7
|
|
[Episode 3210] reward=-59609600.9 actor_loss=0.1244 critic_loss=128702267392.0000 entropy=4.0089 ent_coef=0.001971 approx_kl=0.0018 kl_stop=1 intervention_rate=0.0827 front_blocked=0
|
|
[Episode 3220] reward=-62177925.4 actor_loss=0.1139 critic_loss=130738056396.8000 entropy=4.0099 ent_coef=0.001971 approx_kl=0.0021 kl_stop=1 intervention_rate=0.0924 front_blocked=0
|
|
[Eval 3220] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-415618.2 mean_steps=14.9
|
|
[Episode 3230] reward=-40359930.4 actor_loss=0.1205 critic_loss=115461368685.7143 entropy=4.0071 ent_coef=0.001971 approx_kl=0.0014 kl_stop=1 intervention_rate=0.0794 front_blocked=0
|
|
[Episode 3240] reward=-57237496.7 actor_loss=0.1164 critic_loss=127763514514.2857 entropy=4.0069 ent_coef=0.001971 approx_kl=0.0034 kl_stop=1 intervention_rate=0.0872 front_blocked=0
|
|
[Eval 3240] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-487041.9 mean_steps=12.2
|
|
[Episode 3250] reward=-50017133.9 actor_loss=0.0794 critic_loss=123443225088.0000 entropy=4.0109 ent_coef=0.001971 approx_kl=0.0047 kl_stop=1 intervention_rate=0.0762 front_blocked=0
|
|
[Episode 3260] reward=-45188746.8 actor_loss=0.0941 critic_loss=122967789568.0000 entropy=4.0129 ent_coef=0.001971 approx_kl=0.0024 kl_stop=1 intervention_rate=0.0755 front_blocked=0
|
|
[Eval 3260] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-410944.9 mean_steps=14.1
|
|
[Episode 3270] reward=-58777492.9 actor_loss=0.1188 critic_loss=131317409280.0000 entropy=4.0137 ent_coef=0.001971 approx_kl=0.0001 kl_stop=1 intervention_rate=0.0879 front_blocked=0
|
|
[Episode 3280] reward=-48603424.6 actor_loss=0.1138 critic_loss=119577376995.5556 entropy=4.0132 ent_coef=0.001970 approx_kl=0.0032 kl_stop=1 intervention_rate=0.0814 front_blocked=0
|
|
[Eval 3280] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-477689.3 mean_steps=13.0
|
|
[Episode 3290] reward=-57339362.0 actor_loss=0.1578 critic_loss=128093192192.0000 entropy=4.0157 ent_coef=0.001970 approx_kl=0.0039 kl_stop=1 intervention_rate=0.0944 front_blocked=0
|
|
[Episode 3300] reward=-43934238.1 actor_loss=0.1418 critic_loss=119660635477.3333 entropy=4.0140 ent_coef=0.001970 approx_kl=0.0026 kl_stop=1 intervention_rate=0.0794 front_blocked=0
|
|
[Eval 3300] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-518967.9 mean_steps=12.6
|
|
[Episode 3310] reward=-64941027.1 actor_loss=0.1154 critic_loss=131552431104.0000 entropy=4.0172 ent_coef=0.001970 approx_kl=0.0022 kl_stop=1 intervention_rate=0.0990 front_blocked=0
|
|
[Episode 3320] reward=-55126825.5 actor_loss=0.1142 critic_loss=123169638586.1818 entropy=4.0180 ent_coef=0.001970 approx_kl=0.0041 kl_stop=1 intervention_rate=0.0820 front_blocked=0
|
|
[Eval 3320] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-453566.4 mean_steps=13.1
|
|
[Episode 3330] reward=-62480334.5 actor_loss=0.1102 critic_loss=133610450571.6364 entropy=4.0182 ent_coef=0.001970 approx_kl=0.0018 kl_stop=1 intervention_rate=0.0905 front_blocked=0
|
|
[Episode 3340] reward=-52874496.5 actor_loss=0.0944 critic_loss=125437231104.0000 entropy=4.0186 ent_coef=0.001970 approx_kl=0.0033 kl_stop=1 intervention_rate=0.0827 front_blocked=0
|
|
[Eval 3340] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-424750.0 mean_steps=14.3
|
|
[Episode 3350] reward=-50322465.9 actor_loss=0.1305 critic_loss=121391657415.1111 entropy=4.0181 ent_coef=0.001970 approx_kl=0.0030 kl_stop=1 intervention_rate=0.0801 front_blocked=0
|
|
[Episode 3360] reward=-45159229.9 actor_loss=0.1210 critic_loss=115509511633.4545 entropy=4.0200 ent_coef=0.001970 approx_kl=0.0014 kl_stop=1 intervention_rate=0.0781 front_blocked=0
|
|
[Eval 3360] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-431209.7 mean_steps=12.8
|
|
[Episode 3370] reward=-52892901.3 actor_loss=0.1119 critic_loss=122982649133.1765 entropy=4.0199 ent_coef=0.001970 approx_kl=0.0036 kl_stop=1 intervention_rate=0.0781 front_blocked=0
|
|
[Episode 3380] reward=-50959350.4 actor_loss=0.1057 critic_loss=125468813312.0000 entropy=4.0198 ent_coef=0.001970 approx_kl=0.0034 kl_stop=1 intervention_rate=0.0820 front_blocked=0
|
|
[Eval 3380] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-512694.9 mean_steps=13.3
|
|
[Episode 3390] reward=-66823675.3 actor_loss=0.0969 critic_loss=132824053077.3333 entropy=4.0174 ent_coef=0.001969 approx_kl=0.0016 kl_stop=1 intervention_rate=0.0853 front_blocked=0
|
|
[Episode 3400] reward=-55520588.1 actor_loss=0.1077 critic_loss=126657722525.5385 entropy=4.0194 ent_coef=0.001969 approx_kl=0.0040 kl_stop=1 intervention_rate=0.0794 front_blocked=0
|
|
[Eval 3400] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-403439.0 mean_steps=13.9
|
|
[Episode 3410] reward=-58206031.9 actor_loss=0.1154 critic_loss=126680942080.0000 entropy=4.0159 ent_coef=0.001969 approx_kl=0.0036 kl_stop=1 intervention_rate=0.0872 front_blocked=0
|
|
[Episode 3420] reward=-53357994.6 actor_loss=0.0965 critic_loss=126444849338.1818 entropy=4.0128 ent_coef=0.001969 approx_kl=0.0045 kl_stop=1 intervention_rate=0.0781 front_blocked=0
|
|
[Eval 3420] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-350937.1 mean_steps=14.7
|
|
[Episode 3430] reward=-50463505.0 actor_loss=0.1255 critic_loss=125702019630.5455 entropy=4.0116 ent_coef=0.001969 approx_kl=0.0030 kl_stop=1 intervention_rate=0.0872 front_blocked=0
|
|
[Episode 3440] reward=-54823017.9 actor_loss=0.1081 critic_loss=126136175001.6000 entropy=4.0086 ent_coef=0.001969 approx_kl=0.0039 kl_stop=1 intervention_rate=0.0853 front_blocked=0
|
|
[Eval 3440] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-522225.7 mean_steps=11.9
|
|
[Episode 3450] reward=-48940640.2 actor_loss=0.1126 critic_loss=124936071010.4615 entropy=4.0060 ent_coef=0.001969 approx_kl=0.0028 kl_stop=1 intervention_rate=0.0814 front_blocked=0
|
|
[Episode 3460] reward=-42972385.3 actor_loss=0.0978 critic_loss=118420300520.7273 entropy=4.0046 ent_coef=0.001969 approx_kl=0.0010 kl_stop=1 intervention_rate=0.0768 front_blocked=0
|
|
[Eval 3460] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-398666.2 mean_steps=14.2
|
|
[Episode 3470] reward=-56914715.8 actor_loss=0.1317 critic_loss=124584401683.6923 entropy=4.0024 ent_coef=0.001969 approx_kl=0.0040 kl_stop=1 intervention_rate=0.0846 front_blocked=0
|
|
[Episode 3480] reward=-50120971.8 actor_loss=0.0992 critic_loss=124541766314.6667 entropy=4.0035 ent_coef=0.001969 approx_kl=0.0025 kl_stop=1 intervention_rate=0.0807 front_blocked=0
|
|
[Eval 3480] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-496130.4 mean_steps=13.0
|
|
[Episode 3490] reward=-52783596.9 actor_loss=0.1109 critic_loss=129649847947.6364 entropy=4.0024 ent_coef=0.001969 approx_kl=-0.0006 kl_stop=1 intervention_rate=0.0755 front_blocked=0
|
|
[Episode 3500] reward=-52023506.8 actor_loss=0.1166 critic_loss=126553369419.2941 entropy=3.9995 ent_coef=0.001969 approx_kl=0.0022 kl_stop=1 intervention_rate=0.0840 front_blocked=0
|
|
[Eval 3500] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-458360.2 mean_steps=13.7
|
|
[Episode 3510] reward=-64581931.2 actor_loss=0.1093 critic_loss=130188970914.9091 entropy=3.9988 ent_coef=0.001968 approx_kl=0.0026 kl_stop=1 intervention_rate=0.0892 front_blocked=0
|
|
[Episode 3520] reward=-47779495.5 actor_loss=0.1375 critic_loss=114882877849.6000 entropy=4.0012 ent_coef=0.001968 approx_kl=0.0033 kl_stop=1 intervention_rate=0.0801 front_blocked=0
|
|
[Eval 3520] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-461998.3 mean_steps=12.8
|
|
[Episode 3530] reward=-66566874.2 actor_loss=0.1122 critic_loss=138893179904.0000 entropy=4.0018 ent_coef=0.001968 approx_kl=0.0031 kl_stop=1 intervention_rate=0.0911 front_blocked=0
|
|
[Episode 3540] reward=-48907104.3 actor_loss=0.1145 critic_loss=124603504932.5714 entropy=4.0009 ent_coef=0.001968 approx_kl=0.0031 kl_stop=1 intervention_rate=0.0846 front_blocked=0
|
|
[Eval 3540] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-544535.6 mean_steps=12.7
|
|
[Episode 3550] reward=-52691710.3 actor_loss=0.1200 critic_loss=127082076160.0000 entropy=4.0014 ent_coef=0.001968 approx_kl=0.0048 kl_stop=1 intervention_rate=0.0846 front_blocked=0
|
|
[Episode 3560] reward=-52077162.0 actor_loss=0.0971 critic_loss=127200971776.0000 entropy=4.0000 ent_coef=0.001968 approx_kl=0.0041 kl_stop=1 intervention_rate=0.0801 front_blocked=0
|
|
[Eval 3560] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-435090.9 mean_steps=12.0
|
|
[Episode 3570] reward=-63215592.2 actor_loss=0.1184 critic_loss=131948461260.8000 entropy=4.0034 ent_coef=0.001968 approx_kl=0.0021 kl_stop=1 intervention_rate=0.0794 front_blocked=0
|
|
[Episode 3580] reward=-53115660.8 actor_loss=0.1235 critic_loss=129406696106.6667 entropy=4.0055 ent_coef=0.001968 approx_kl=0.0001 kl_stop=1 intervention_rate=0.0840 front_blocked=0
|
|
[Eval 3580] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-474900.5 mean_steps=13.7
|
|
[Episode 3590] reward=-61345634.7 actor_loss=0.1558 critic_loss=131320489106.2857 entropy=4.0035 ent_coef=0.001968 approx_kl=0.0044 kl_stop=1 intervention_rate=0.0879 front_blocked=0
|
|
[Episode 3600] reward=-55167199.2 actor_loss=0.1225 critic_loss=125849712347.4286 entropy=4.0052 ent_coef=0.001968 approx_kl=0.0040 kl_stop=1 intervention_rate=0.0820 front_blocked=0
|
|
[Eval 3600] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-434710.2 mean_steps=14.0
|
|
[Episode 3610] reward=-41548972.9 actor_loss=0.0956 critic_loss=119660633367.2727 entropy=4.0070 ent_coef=0.001968 approx_kl=0.0019 kl_stop=1 intervention_rate=0.0677 front_blocked=0
|
|
[Episode 3620] reward=-57062632.1 actor_loss=0.1076 critic_loss=129537421767.1111 entropy=4.0062 ent_coef=0.001967 approx_kl=0.0024 kl_stop=1 intervention_rate=0.0898 front_blocked=0
|
|
[Eval 3620] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-431917.6 mean_steps=12.7
|
|
[Episode 3630] reward=-48336771.4 actor_loss=0.1063 critic_loss=121765080772.9231 entropy=4.0024 ent_coef=0.001967 approx_kl=0.0032 kl_stop=1 intervention_rate=0.0866 front_blocked=0
|
|
[Episode 3640] reward=-51879205.2 actor_loss=0.1095 critic_loss=125093343232.0000 entropy=4.0011 ent_coef=0.001967 approx_kl=0.0021 kl_stop=1 intervention_rate=0.0807 front_blocked=0
|
|
[Eval 3640] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-287154.2 mean_steps=15.8
|
|
[Episode 3650] reward=-47067482.7 actor_loss=0.0828 critic_loss=120392764984.8889 entropy=4.0003 ent_coef=0.001967 approx_kl=0.0029 kl_stop=1 intervention_rate=0.0749 front_blocked=0
|
|
[Episode 3660] reward=-60724169.2 actor_loss=0.1207 critic_loss=129927896405.3333 entropy=3.9995 ent_coef=0.001967 approx_kl=0.0036 kl_stop=1 intervention_rate=0.0918 front_blocked=0
|
|
[Eval 3660] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-603355.7 mean_steps=13.1
|
|
[Episode 3670] reward=-45388824.4 actor_loss=0.1175 critic_loss=120177560780.8000 entropy=4.0019 ent_coef=0.001967 approx_kl=0.0017 kl_stop=1 intervention_rate=0.0788 front_blocked=0
|
|
[Episode 3680] reward=-49975262.2 actor_loss=0.0964 critic_loss=128484317593.6000 entropy=4.0006 ent_coef=0.001967 approx_kl=0.0027 kl_stop=1 intervention_rate=0.0729 front_blocked=0
|
|
[Eval 3680] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-558633.4 mean_steps=11.2
|
|
[Episode 3690] reward=-65581983.0 actor_loss=0.1277 critic_loss=132354442035.2000 entropy=3.9976 ent_coef=0.001967 approx_kl=0.0019 kl_stop=1 intervention_rate=0.0879 front_blocked=0
|
|
[Episode 3700] reward=-55117280.1 actor_loss=0.1466 critic_loss=130549163622.4000 entropy=4.0001 ent_coef=0.001967 approx_kl=0.0031 kl_stop=1 intervention_rate=0.0885 front_blocked=0
|
|
[Eval 3700] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-256822.5 mean_steps=15.4
|
|
[Episode 3710] reward=-53078039.8 actor_loss=0.1230 critic_loss=128164443316.7059 entropy=4.0002 ent_coef=0.001967 approx_kl=0.0010 kl_stop=1 intervention_rate=0.0827 front_blocked=0
|
|
[Episode 3720] reward=-47928977.8 actor_loss=0.1042 critic_loss=120288072362.6667 entropy=3.9989 ent_coef=0.001967 approx_kl=0.0055 kl_stop=1 intervention_rate=0.0794 front_blocked=0
|
|
[Eval 3720] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-499254.7 mean_steps=12.3
|
|
[Episode 3730] reward=-43247354.5 actor_loss=0.1148 critic_loss=119053631488.0000 entropy=4.0021 ent_coef=0.001966 approx_kl=0.0013 kl_stop=1 intervention_rate=0.0768 front_blocked=0
|
|
[Episode 3740] reward=-57023598.7 actor_loss=0.1448 critic_loss=128190624768.0000 entropy=4.0014 ent_coef=0.001966 approx_kl=0.0009 kl_stop=1 intervention_rate=0.0879 front_blocked=0
|
|
[Eval 3740] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-434931.5 mean_steps=14.4
|
|
[Episode 3750] reward=-48611040.3 actor_loss=0.1198 critic_loss=123198159257.6000 entropy=4.0003 ent_coef=0.001966 approx_kl=0.0025 kl_stop=1 intervention_rate=0.0807 front_blocked=0
|
|
[Episode 3760] reward=-51463437.7 actor_loss=0.1429 critic_loss=121070823716.5714 entropy=3.9966 ent_coef=0.001966 approx_kl=0.0023 kl_stop=1 intervention_rate=0.0872 front_blocked=0
|
|
[Eval 3760] success_rate=0.050 qp_infeasible_rate=0.950 mean_return=-740657.5 mean_steps=9.8
|
|
[Episode 3770] reward=-51595851.7 actor_loss=0.0888 critic_loss=126005392384.0000 entropy=3.9969 ent_coef=0.001966 approx_kl=0.0041 kl_stop=1 intervention_rate=0.0710 front_blocked=0
|
|
[Episode 3780] reward=-50773565.9 actor_loss=0.1186 critic_loss=121684337550.2222 entropy=3.9977 ent_coef=0.001966 approx_kl=0.0038 kl_stop=1 intervention_rate=0.0872 front_blocked=0
|
|
[Eval 3780] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-319889.0 mean_steps=15.8
|
|
[Episode 3790] reward=-42287307.3 actor_loss=0.1056 critic_loss=122614908050.2857 entropy=3.9977 ent_coef=0.001966 approx_kl=0.0052 kl_stop=1 intervention_rate=0.0716 front_blocked=0
|
|
[Episode 3800] reward=-54748897.9 actor_loss=0.1278 critic_loss=124377078897.7778 entropy=3.9985 ent_coef=0.001966 approx_kl=0.0014 kl_stop=1 intervention_rate=0.0931 front_blocked=0
|
|
[Eval 3800] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-453450.8 mean_steps=12.9
|
|
[Episode 3810] reward=-56458413.2 actor_loss=0.1099 critic_loss=131282965299.2000 entropy=3.9977 ent_coef=0.001966 approx_kl=0.0018 kl_stop=1 intervention_rate=0.0872 front_blocked=0
|
|
[Episode 3820] reward=-49762316.7 actor_loss=0.1437 critic_loss=124958244864.0000 entropy=3.9949 ent_coef=0.001966 approx_kl=0.0033 kl_stop=1 intervention_rate=0.0840 front_blocked=0
|
|
[Eval 3820] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-440976.7 mean_steps=13.5
|
|
[Episode 3830] reward=-56517645.8 actor_loss=0.1244 critic_loss=127618323456.0000 entropy=3.9927 ent_coef=0.001966 approx_kl=0.0029 kl_stop=1 intervention_rate=0.0820 front_blocked=0
|
|
[Episode 3840] reward=-57085518.9 actor_loss=0.1199 critic_loss=132727643249.7778 entropy=3.9918 ent_coef=0.001965 approx_kl=0.0030 kl_stop=1 intervention_rate=0.0892 front_blocked=0
|
|
[Eval 3840] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-517097.5 mean_steps=12.9
|
|
[Episode 3850] reward=-57643191.8 actor_loss=0.1192 critic_loss=126434811084.8000 entropy=3.9895 ent_coef=0.001965 approx_kl=0.0018 kl_stop=1 intervention_rate=0.0938 front_blocked=0
|
|
[Episode 3860] reward=-50121296.6 actor_loss=0.1415 critic_loss=122569605120.0000 entropy=3.9909 ent_coef=0.001965 approx_kl=0.0022 kl_stop=1 intervention_rate=0.0820 front_blocked=0
|
|
[Eval 3860] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-435648.2 mean_steps=14.3
|
|
[Episode 3870] reward=-51242485.4 actor_loss=0.0946 critic_loss=122053392942.5455 entropy=3.9877 ent_coef=0.001965 approx_kl=0.0037 kl_stop=1 intervention_rate=0.0749 front_blocked=0
|
|
[Episode 3880] reward=-52112991.3 actor_loss=0.0967 critic_loss=127230316171.6364 entropy=3.9872 ent_coef=0.001965 approx_kl=0.0039 kl_stop=1 intervention_rate=0.0775 front_blocked=0
|
|
[Eval 3880] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-494855.1 mean_steps=12.9
|
|
[Episode 3890] reward=-47373936.2 actor_loss=0.1263 critic_loss=121387174074.1818 entropy=3.9843 ent_coef=0.001965 approx_kl=0.0026 kl_stop=1 intervention_rate=0.0846 front_blocked=0
|
|
[Episode 3900] reward=-40951980.0 actor_loss=0.1232 critic_loss=115567409152.0000 entropy=3.9874 ent_coef=0.001965 approx_kl=0.0038 kl_stop=1 intervention_rate=0.0833 front_blocked=0
|
|
[Eval 3900] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-458313.5 mean_steps=12.9
|
|
[Episode 3910] reward=-51136622.7 actor_loss=0.1271 critic_loss=123029088098.4615 entropy=3.9889 ent_coef=0.001965 approx_kl=0.0038 kl_stop=1 intervention_rate=0.0801 front_blocked=0
|
|
[Episode 3920] reward=-49373970.5 actor_loss=0.0945 critic_loss=124972761088.0000 entropy=3.9904 ent_coef=0.001965 approx_kl=0.0029 kl_stop=1 intervention_rate=0.0807 front_blocked=0
|
|
[Eval 3920] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-445253.3 mean_steps=14.2
|
|
[Episode 3930] reward=-53065550.4 actor_loss=0.1273 critic_loss=122517039786.6667 entropy=3.9903 ent_coef=0.001965 approx_kl=0.0039 kl_stop=1 intervention_rate=0.0833 front_blocked=0
|
|
[Episode 3940] reward=-52887215.3 actor_loss=0.1033 critic_loss=120276018790.4000 entropy=3.9900 ent_coef=0.001965 approx_kl=0.0027 kl_stop=1 intervention_rate=0.0872 front_blocked=0
|
|
[Eval 3940] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-563072.9 mean_steps=12.6
|
|
[Episode 3950] reward=-56352270.1 actor_loss=0.1164 critic_loss=127333538201.6000 entropy=3.9871 ent_coef=0.001964 approx_kl=0.0021 kl_stop=1 intervention_rate=0.0827 front_blocked=0
|
|
[Episode 3960] reward=-56538673.5 actor_loss=0.1432 critic_loss=126239149494.8571 entropy=3.9878 ent_coef=0.001964 approx_kl=0.0036 kl_stop=1 intervention_rate=0.0846 front_blocked=0
|
|
[Eval 3960] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-568266.0 mean_steps=13.2
|
|
[Episode 3970] reward=-57036280.4 actor_loss=0.1202 critic_loss=122761039416.8889 entropy=3.9867 ent_coef=0.001964 approx_kl=0.0038 kl_stop=1 intervention_rate=0.0931 front_blocked=0
|
|
[Episode 3980] reward=-55147595.5 actor_loss=0.1074 critic_loss=125078251520.0000 entropy=3.9873 ent_coef=0.001964 approx_kl=0.0036 kl_stop=1 intervention_rate=0.0846 front_blocked=0
|
|
[Eval 3980] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-569037.3 mean_steps=12.1
|
|
[Episode 3990] reward=-43855940.5 actor_loss=0.0807 critic_loss=115770364359.1111 entropy=3.9840 ent_coef=0.001964 approx_kl=0.0033 kl_stop=1 intervention_rate=0.0742 front_blocked=0
|
|
[Episode 4000] reward=-49578967.0 actor_loss=0.0901 critic_loss=122585444667.0769 entropy=3.9818 ent_coef=0.001964 approx_kl=0.0012 kl_stop=1 intervention_rate=0.0762 front_blocked=0
|
|
[Eval 4000] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-633494.6 mean_steps=11.0
|
|
[Episode 4010] reward=-38950315.0 actor_loss=0.1143 critic_loss=117159870464.0000 entropy=3.9784 ent_coef=0.001964 approx_kl=0.0031 kl_stop=1 intervention_rate=0.0768 front_blocked=0
|
|
[Episode 4020] reward=-54063098.8 actor_loss=0.1318 critic_loss=122193571384.8889 entropy=3.9745 ent_coef=0.001964 approx_kl=0.0015 kl_stop=1 intervention_rate=0.0944 front_blocked=0
|
|
[Eval 4020] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-439174.1 mean_steps=13.6
|
|
[Episode 4030] reward=-45129759.8 actor_loss=0.1290 critic_loss=117193502720.0000 entropy=3.9721 ent_coef=0.001964 approx_kl=0.0038 kl_stop=1 intervention_rate=0.0853 front_blocked=0
|
|
[Episode 4040] reward=-61704017.5 actor_loss=0.1331 critic_loss=129433222348.8000 entropy=3.9709 ent_coef=0.001964 approx_kl=0.0039 kl_stop=1 intervention_rate=0.0840 front_blocked=0
|
|
[Eval 4040] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-587425.2 mean_steps=12.0
|
|
[Episode 4050] reward=-56089497.2 actor_loss=0.0919 critic_loss=130244809289.1429 entropy=3.9663 ent_coef=0.001964 approx_kl=0.0052 kl_stop=1 intervention_rate=0.0762 front_blocked=0
|
|
[Episode 4060] reward=-64701710.8 actor_loss=0.1042 critic_loss=137244062151.1111 entropy=3.9611 ent_coef=0.001963 approx_kl=0.0023 kl_stop=1 intervention_rate=0.0866 front_blocked=0
|
|
[Eval 4060] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-428752.2 mean_steps=13.5
|
|
[Episode 4070] reward=-52457174.9 actor_loss=0.1251 critic_loss=123634231296.0000 entropy=3.9603 ent_coef=0.001963 approx_kl=0.0021 kl_stop=1 intervention_rate=0.0833 front_blocked=0
|
|
[Episode 4080] reward=-61070101.6 actor_loss=0.1135 critic_loss=133592495718.4000 entropy=3.9597 ent_coef=0.001963 approx_kl=0.0018 kl_stop=1 intervention_rate=0.0892 front_blocked=0
|
|
[Eval 4080] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-615361.0 mean_steps=11.5
|
|
[Episode 4090] reward=-62329124.5 actor_loss=0.1676 critic_loss=133116962406.4000 entropy=3.9626 ent_coef=0.001963 approx_kl=0.0039 kl_stop=1 intervention_rate=0.0905 front_blocked=0
|
|
[Episode 4100] reward=-47469232.0 actor_loss=0.1077 critic_loss=124473030144.0000 entropy=3.9610 ent_coef=0.001963 approx_kl=0.0015 kl_stop=1 intervention_rate=0.0768 front_blocked=0
|
|
[Eval 4100] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-402267.4 mean_steps=13.7
|
|
[Episode 4110] reward=-65173031.3 actor_loss=0.1503 critic_loss=133073345194.6667 entropy=3.9627 ent_coef=0.001963 approx_kl=0.0020 kl_stop=1 intervention_rate=0.0944 front_blocked=0
|
|
[Episode 4120] reward=-48948437.7 actor_loss=0.1025 critic_loss=122253287424.0000 entropy=3.9578 ent_coef=0.001963 approx_kl=-0.0000 kl_stop=1 intervention_rate=0.0768 front_blocked=0
|
|
[Eval 4120] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-501837.1 mean_steps=13.9
|
|
[Episode 4130] reward=-46647626.2 actor_loss=0.1132 critic_loss=121381733522.2857 entropy=3.9549 ent_coef=0.001963 approx_kl=0.0063 kl_stop=1 intervention_rate=0.0742 front_blocked=0
|
|
[Episode 4140] reward=-50893348.8 actor_loss=0.1255 critic_loss=124305162240.0000 entropy=3.9561 ent_coef=0.001963 approx_kl=0.0037 kl_stop=1 intervention_rate=0.0820 front_blocked=0
|
|
[Eval 4140] success_rate=0.650 qp_infeasible_rate=0.350 mean_return=-301173.6 mean_steps=16.2
|
|
[Episode 4150] reward=-52140870.1 actor_loss=0.1392 critic_loss=125276219392.0000 entropy=3.9578 ent_coef=0.001963 approx_kl=0.0039 kl_stop=1 intervention_rate=0.0924 front_blocked=0
|
|
[Episode 4160] reward=-62576880.1 actor_loss=0.1311 critic_loss=128324194759.1111 entropy=3.9584 ent_coef=0.001963 approx_kl=0.0011 kl_stop=1 intervention_rate=0.0885 front_blocked=0
|
|
[Eval 4160] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-602118.9 mean_steps=12.1
|
|
[Episode 4170] reward=-53834738.6 actor_loss=0.1042 critic_loss=124309381120.0000 entropy=3.9589 ent_coef=0.001962 approx_kl=-0.0008 kl_stop=1 intervention_rate=0.0807 front_blocked=0
|
|
[Episode 4180] reward=-48210729.0 actor_loss=0.1474 critic_loss=123462072027.4286 entropy=3.9589 ent_coef=0.001962 approx_kl=0.0037 kl_stop=1 intervention_rate=0.0749 front_blocked=0
|
|
[Eval 4180] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-539020.4 mean_steps=12.4
|
|
[Episode 4190] reward=-52460159.0 actor_loss=0.0932 critic_loss=126608225280.0000 entropy=3.9576 ent_coef=0.001962 approx_kl=0.0005 kl_stop=1 intervention_rate=0.0820 front_blocked=0
|
|
[Episode 4200] reward=-46424409.7 actor_loss=0.0813 critic_loss=122140531475.6923 entropy=3.9583 ent_coef=0.001962 approx_kl=0.0027 kl_stop=1 intervention_rate=0.0755 front_blocked=0
|
|
[Eval 4200] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-330119.4 mean_steps=14.2
|
|
[Episode 4210] reward=-58302049.2 actor_loss=0.0909 critic_loss=125950206771.2000 entropy=3.9533 ent_coef=0.001962 approx_kl=0.0025 kl_stop=1 intervention_rate=0.0801 front_blocked=0
|
|
[Episode 4220] reward=-56898720.8 actor_loss=0.1189 critic_loss=127723256035.5556 entropy=3.9559 ent_coef=0.001962 approx_kl=0.0032 kl_stop=1 intervention_rate=0.0905 front_blocked=0
|
|
[Eval 4220] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-554798.6 mean_steps=11.7
|
|
[Episode 4230] reward=-51020352.6 actor_loss=0.0942 critic_loss=120501766330.1818 entropy=3.9548 ent_coef=0.001962 approx_kl=0.0017 kl_stop=1 intervention_rate=0.0762 front_blocked=0
|
|
[Episode 4240] reward=-54254091.4 actor_loss=0.1215 critic_loss=123174383243.6364 entropy=3.9540 ent_coef=0.001962 approx_kl=0.0026 kl_stop=1 intervention_rate=0.0846 front_blocked=0
|
|
[Eval 4240] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-518233.1 mean_steps=13.1
|
|
[Episode 4250] reward=-49569353.1 actor_loss=0.1074 critic_loss=120976867328.0000 entropy=3.9537 ent_coef=0.001962 approx_kl=0.0026 kl_stop=1 intervention_rate=0.0736 front_blocked=0
|
|
[Episode 4260] reward=-55680052.9 actor_loss=0.0972 critic_loss=125475773833.8462 entropy=3.9536 ent_coef=0.001962 approx_kl=0.0033 kl_stop=1 intervention_rate=0.0820 front_blocked=0
|
|
[Eval 4260] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-475882.9 mean_steps=13.8
|
|
[Episode 4270] reward=-59292809.6 actor_loss=0.0994 critic_loss=129511766698.6667 entropy=3.9508 ent_coef=0.001962 approx_kl=0.0024 kl_stop=1 intervention_rate=0.0814 front_blocked=0
|
|
[Episode 4280] reward=-51600412.3 actor_loss=0.1152 critic_loss=120023012147.2000 entropy=3.9468 ent_coef=0.001961 approx_kl=0.0035 kl_stop=1 intervention_rate=0.0781 front_blocked=0
|
|
[Eval 4280] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-437588.1 mean_steps=13.4
|
|
[Episode 4290] reward=-51062346.8 actor_loss=0.1051 critic_loss=124013976780.8000 entropy=3.9479 ent_coef=0.001961 approx_kl=0.0023 kl_stop=1 intervention_rate=0.0840 front_blocked=0
|
|
[Episode 4300] reward=-56518100.1 actor_loss=0.0969 critic_loss=125476462592.0000 entropy=3.9401 ent_coef=0.001961 approx_kl=0.0037 kl_stop=1 intervention_rate=0.0885 front_blocked=0
|
|
[Eval 4300] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-625280.5 mean_steps=12.5
|
|
[Episode 4310] reward=-59215527.0 actor_loss=0.1317 critic_loss=129180722517.3333 entropy=3.9388 ent_coef=0.001961 approx_kl=0.0042 kl_stop=1 intervention_rate=0.0905 front_blocked=0
|
|
[Episode 4320] reward=-48969623.9 actor_loss=0.1006 critic_loss=124578313216.0000 entropy=3.9374 ent_coef=0.001961 approx_kl=0.0033 kl_stop=1 intervention_rate=0.0807 front_blocked=0
|
|
[Eval 4320] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-441082.9 mean_steps=13.4
|
|
[Episode 4330] reward=-53022262.6 actor_loss=0.1112 critic_loss=126061164251.4286 entropy=3.9354 ent_coef=0.001961 approx_kl=0.0035 kl_stop=1 intervention_rate=0.0833 front_blocked=0
|
|
[Episode 4340] reward=-55635207.8 actor_loss=0.1079 critic_loss=131058478788.9231 entropy=3.9327 ent_coef=0.001961 approx_kl=0.0027 kl_stop=1 intervention_rate=0.0794 front_blocked=0
|
|
[Eval 4340] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-594547.3 mean_steps=11.4
|
|
[Episode 4350] reward=-49421982.8 actor_loss=0.1098 critic_loss=123647814314.6667 entropy=3.9282 ent_coef=0.001961 approx_kl=0.0034 kl_stop=1 intervention_rate=0.0814 front_blocked=0
|
|
[Episode 4360] reward=-56716873.6 actor_loss=0.1020 critic_loss=125397820757.3333 entropy=3.9287 ent_coef=0.001961 approx_kl=0.0009 kl_stop=1 intervention_rate=0.0820 front_blocked=0
|
|
[Eval 4360] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-371720.6 mean_steps=15.3
|
|
[Episode 4370] reward=-58145133.2 actor_loss=0.1137 critic_loss=127386289948.4444 entropy=3.9300 ent_coef=0.001961 approx_kl=0.0015 kl_stop=1 intervention_rate=0.0872 front_blocked=0
|
|
[Episode 4380] reward=-70572617.6 actor_loss=0.0880 critic_loss=138707994214.4000 entropy=3.9273 ent_coef=0.001961 approx_kl=0.0020 kl_stop=1 intervention_rate=0.1009 front_blocked=0
|
|
[Eval 4380] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-542174.5 mean_steps=12.7
|
|
[Episode 4390] reward=-51242006.3 actor_loss=0.1082 critic_loss=127266685610.6667 entropy=3.9290 ent_coef=0.001960 approx_kl=0.0041 kl_stop=1 intervention_rate=0.0801 front_blocked=0
|
|
[Episode 4400] reward=-46770183.3 actor_loss=0.1399 critic_loss=118105165238.8571 entropy=3.9248 ent_coef=0.001960 approx_kl=0.0040 kl_stop=1 intervention_rate=0.0781 front_blocked=0
|
|
[Eval 4400] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-383491.4 mean_steps=13.9
|
|
[Episode 4410] reward=-48016708.9 actor_loss=0.1321 critic_loss=121413126616.6154 entropy=3.9278 ent_coef=0.001960 approx_kl=0.0053 kl_stop=1 intervention_rate=0.0859 front_blocked=0
|
|
[Episode 4420] reward=-55233762.9 actor_loss=0.1208 critic_loss=125306605940.3636 entropy=3.9283 ent_coef=0.001960 approx_kl=0.0030 kl_stop=1 intervention_rate=0.0749 front_blocked=0
|
|
[Eval 4420] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-545740.1 mean_steps=11.7
|
|
[Episode 4430] reward=-45182329.8 actor_loss=0.1061 critic_loss=123056201045.3333 entropy=3.9265 ent_coef=0.001960 approx_kl=0.0039 kl_stop=1 intervention_rate=0.0710 front_blocked=0
|
|
[Episode 4440] reward=-48110043.9 actor_loss=0.1061 critic_loss=122177015352.8889 entropy=3.9230 ent_coef=0.001960 approx_kl=0.0029 kl_stop=1 intervention_rate=0.0749 front_blocked=0
|
|
[Eval 4440] success_rate=0.100 qp_infeasible_rate=0.900 mean_return=-717963.3 mean_steps=10.7
|
|
[Episode 4450] reward=-44995852.9 actor_loss=0.1145 critic_loss=118687943338.6667 entropy=3.9217 ent_coef=0.001960 approx_kl=0.0025 kl_stop=1 intervention_rate=0.0775 front_blocked=0
|
|
[Episode 4460] reward=-47644494.6 actor_loss=0.1071 critic_loss=120648672051.2000 entropy=3.9236 ent_coef=0.001960 approx_kl=0.0019 kl_stop=1 intervention_rate=0.0801 front_blocked=0
|
|
[Eval 4460] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-451793.0 mean_steps=12.8
|
|
[Episode 4470] reward=-51938044.8 actor_loss=0.1293 critic_loss=121062542848.0000 entropy=3.9230 ent_coef=0.001960 approx_kl=0.0034 kl_stop=1 intervention_rate=0.0840 front_blocked=0
|
|
[Episode 4480] reward=-42873324.3 actor_loss=0.0753 critic_loss=122004032716.8000 entropy=3.9214 ent_coef=0.001960 approx_kl=0.0027 kl_stop=1 intervention_rate=0.0723 front_blocked=0
|
|
[Eval 4480] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-492704.5 mean_steps=12.6
|
|
[Episode 4490] reward=-55777681.7 actor_loss=0.1198 critic_loss=131352571448.8889 entropy=3.9203 ent_coef=0.001960 approx_kl=0.0049 kl_stop=1 intervention_rate=0.0807 front_blocked=0
|
|
[Episode 4500] reward=-55422574.1 actor_loss=0.1197 critic_loss=129365836800.0000 entropy=3.9169 ent_coef=0.001960 approx_kl=0.0020 kl_stop=1 intervention_rate=0.0775 front_blocked=0
|
|
[Eval 4500] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-390881.9 mean_steps=13.9
|
|
[Episode 4510] reward=-66761972.1 actor_loss=0.0961 critic_loss=132788447074.4615 entropy=3.9148 ent_coef=0.001959 approx_kl=0.0023 kl_stop=1 intervention_rate=0.0879 front_blocked=0
|
|
[Episode 4520] reward=-49554994.0 actor_loss=0.1040 critic_loss=123313512448.0000 entropy=3.9130 ent_coef=0.001959 approx_kl=0.0050 kl_stop=1 intervention_rate=0.0781 front_blocked=0
|
|
[Eval 4520] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-325795.4 mean_steps=14.5
|
|
[Episode 4530] reward=-55762867.6 actor_loss=0.1189 critic_loss=125712559755.6364 entropy=3.9173 ent_coef=0.001959 approx_kl=0.0033 kl_stop=1 intervention_rate=0.0853 front_blocked=0
|
|
[Episode 4540] reward=-56589098.9 actor_loss=0.0889 critic_loss=130356757504.0000 entropy=3.9175 ent_coef=0.001959 approx_kl=0.0031 kl_stop=1 intervention_rate=0.0768 front_blocked=0
|
|
[Eval 4540] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-427012.3 mean_steps=13.3
|
|
[Episode 4550] reward=-54333964.0 actor_loss=0.1222 critic_loss=122513421653.3333 entropy=3.9193 ent_coef=0.001959 approx_kl=0.0006 kl_stop=1 intervention_rate=0.0814 front_blocked=0
|
|
[Episode 4560] reward=-65903165.7 actor_loss=0.1212 critic_loss=130595551232.0000 entropy=3.9178 ent_coef=0.001959 approx_kl=0.0036 kl_stop=1 intervention_rate=0.0866 front_blocked=0
|
|
[Eval 4560] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-528677.5 mean_steps=11.1
|
|
[Episode 4570] reward=-60662925.7 actor_loss=0.1190 critic_loss=132107611340.8000 entropy=3.9175 ent_coef=0.001959 approx_kl=0.0047 kl_stop=1 intervention_rate=0.0905 front_blocked=0
|
|
[Episode 4580] reward=-53562824.5 actor_loss=0.0818 critic_loss=124150361526.8571 entropy=3.9172 ent_coef=0.001959 approx_kl=0.0026 kl_stop=1 intervention_rate=0.0788 front_blocked=0
|
|
[Eval 4580] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-445476.4 mean_steps=13.8
|
|
[Episode 4590] reward=-45635357.7 actor_loss=0.1042 critic_loss=119446653610.6667 entropy=3.9152 ent_coef=0.001959 approx_kl=0.0024 kl_stop=1 intervention_rate=0.0768 front_blocked=0
|
|
[Episode 4600] reward=-48361393.9 actor_loss=0.1032 critic_loss=126715137137.7778 entropy=3.9093 ent_coef=0.001959 approx_kl=0.0019 kl_stop=1 intervention_rate=0.0716 front_blocked=0
|
|
[Eval 4600] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-519041.0 mean_steps=13.3
|
|
[Episode 4610] reward=-44223245.6 actor_loss=0.0876 critic_loss=123503414418.2857 entropy=3.9076 ent_coef=0.001959 approx_kl=0.0020 kl_stop=1 intervention_rate=0.0697 front_blocked=0
|
|
[Episode 4620] reward=-45121243.1 actor_loss=0.1224 critic_loss=122629059584.0000 entropy=3.9056 ent_coef=0.001958 approx_kl=0.0022 kl_stop=1 intervention_rate=0.0781 front_blocked=0
|
|
[Eval 4620] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-491545.2 mean_steps=12.9
|
|
[Episode 4630] reward=-56188846.0 actor_loss=0.1015 critic_loss=132100349366.8571 entropy=3.9055 ent_coef=0.001958 approx_kl=0.0046 kl_stop=1 intervention_rate=0.0807 front_blocked=0
|
|
[Episode 4640] reward=-55512843.1 actor_loss=0.1129 critic_loss=126149872025.6000 entropy=3.9050 ent_coef=0.001958 approx_kl=0.0048 kl_stop=1 intervention_rate=0.0794 front_blocked=0
|
|
[Eval 4640] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-572055.4 mean_steps=12.0
|
|
[Episode 4650] reward=-50647477.2 actor_loss=0.1382 critic_loss=122890893312.0000 entropy=3.9054 ent_coef=0.001958 approx_kl=0.0022 kl_stop=1 intervention_rate=0.0794 front_blocked=0
|
|
[Episode 4660] reward=-50507671.6 actor_loss=0.1146 critic_loss=124993454665.1429 entropy=3.9062 ent_coef=0.001958 approx_kl=0.0016 kl_stop=1 intervention_rate=0.0762 front_blocked=0
|
|
[Eval 4660] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-453572.7 mean_steps=14.3
|
|
[Episode 4670] reward=-58850707.8 actor_loss=0.1407 critic_loss=125339762688.0000 entropy=3.9037 ent_coef=0.001958 approx_kl=0.0020 kl_stop=1 intervention_rate=0.0885 front_blocked=0
|
|
[Episode 4680] reward=-47859723.3 actor_loss=0.1163 critic_loss=123844286054.4000 entropy=3.9056 ent_coef=0.001958 approx_kl=0.0025 kl_stop=1 intervention_rate=0.0788 front_blocked=0
|
|
[Eval 4680] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-262369.1 mean_steps=15.7
|
|
[Episode 4690] reward=-53903236.8 actor_loss=0.0977 critic_loss=125047835989.3333 entropy=3.9014 ent_coef=0.001958 approx_kl=0.0004 kl_stop=1 intervention_rate=0.0768 front_blocked=0
|
|
[Episode 4700] reward=-49683027.8 actor_loss=0.1081 critic_loss=119981329612.8000 entropy=3.8995 ent_coef=0.001958 approx_kl=0.0020 kl_stop=1 intervention_rate=0.0833 front_blocked=0
|
|
[Eval 4700] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-616570.1 mean_steps=11.4
|
|
[Episode 4710] reward=-53164503.5 actor_loss=0.1497 critic_loss=125962417493.3333 entropy=3.8975 ent_coef=0.001958 approx_kl=0.0009 kl_stop=1 intervention_rate=0.0872 front_blocked=0
|
|
[Episode 4720] reward=-50271634.9 actor_loss=0.0856 critic_loss=121220212508.4444 entropy=3.8970 ent_coef=0.001958 approx_kl=0.0017 kl_stop=1 intervention_rate=0.0703 front_blocked=0
|
|
[Eval 4720] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-542456.7 mean_steps=12.7
|
|
[Episode 4730] reward=-49080365.6 actor_loss=0.1013 critic_loss=122644963328.0000 entropy=3.8978 ent_coef=0.001957 approx_kl=0.0034 kl_stop=1 intervention_rate=0.0833 front_blocked=0
|
|
[Episode 4740] reward=-66250281.6 actor_loss=0.1178 critic_loss=131924812288.0000 entropy=3.8937 ent_coef=0.001957 approx_kl=0.0026 kl_stop=1 intervention_rate=0.0924 front_blocked=0
|
|
[Eval 4740] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-370216.8 mean_steps=14.9
|
|
[Episode 4750] reward=-50630507.2 actor_loss=0.0993 critic_loss=119994840795.4286 entropy=3.8931 ent_coef=0.001957 approx_kl=0.0072 kl_stop=1 intervention_rate=0.0781 front_blocked=0
|
|
[Episode 4760] reward=-48149007.7 actor_loss=0.1315 critic_loss=120549395456.0000 entropy=3.8962 ent_coef=0.001957 approx_kl=0.0010 kl_stop=1 intervention_rate=0.0742 front_blocked=0
|
|
[Eval 4760] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-590539.2 mean_steps=12.5
|
|
[Episode 4770] reward=-41086937.3 actor_loss=0.1129 critic_loss=118850258066.2857 entropy=3.8923 ent_coef=0.001957 approx_kl=0.0064 kl_stop=1 intervention_rate=0.0742 front_blocked=0
|
|
[Episode 4780] reward=-43186205.3 actor_loss=0.1230 critic_loss=118906940074.6667 entropy=3.8916 ent_coef=0.001957 approx_kl=0.0008 kl_stop=1 intervention_rate=0.0801 front_blocked=0
|
|
[Eval 4780] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-454421.7 mean_steps=13.4
|
|
[Episode 4790] reward=-51568360.3 actor_loss=0.0922 critic_loss=125459883804.4444 entropy=3.8922 ent_coef=0.001957 approx_kl=0.0011 kl_stop=1 intervention_rate=0.0762 front_blocked=0
|
|
[Episode 4800] reward=-47449087.3 actor_loss=0.1089 critic_loss=124058344561.7778 entropy=3.8897 ent_coef=0.001957 approx_kl=0.0034 kl_stop=1 intervention_rate=0.0749 front_blocked=0
|
|
[Eval 4800] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-431289.8 mean_steps=14.6
|
|
[Episode 4810] reward=-58967615.2 actor_loss=0.1228 critic_loss=127447867392.0000 entropy=3.8867 ent_coef=0.001957 approx_kl=0.0026 kl_stop=1 intervention_rate=0.0859 front_blocked=0
|
|
[Episode 4820] reward=-52525653.2 actor_loss=0.1098 critic_loss=125889978368.0000 entropy=3.8839 ent_coef=0.001957 approx_kl=0.0000 kl_stop=1 intervention_rate=0.0820 front_blocked=0
|
|
[Eval 4820] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-358836.5 mean_steps=15.5
|
|
[Episode 4830] reward=-65257806.4 actor_loss=0.1143 critic_loss=135176196551.1111 entropy=3.8816 ent_coef=0.001957 approx_kl=0.0037 kl_stop=1 intervention_rate=0.0846 front_blocked=0
|
|
[Episode 4840] reward=-45681518.1 actor_loss=0.1031 critic_loss=122292382626.9091 entropy=3.8761 ent_coef=0.001956 approx_kl=0.0024 kl_stop=1 intervention_rate=0.0755 front_blocked=0
|
|
[Eval 4840] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-358618.2 mean_steps=14.6
|
|
[Episode 4850] reward=-49829417.4 actor_loss=0.1076 critic_loss=123404004010.6667 entropy=3.8768 ent_coef=0.001956 approx_kl=0.0029 kl_stop=1 intervention_rate=0.0775 front_blocked=0
|
|
[Episode 4860] reward=-51827015.6 actor_loss=0.1182 critic_loss=124093705216.0000 entropy=3.8743 ent_coef=0.001956 approx_kl=0.0026 kl_stop=1 intervention_rate=0.0768 front_blocked=0
|
|
[Eval 4860] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-581268.6 mean_steps=11.9
|
|
[Episode 4870] reward=-53781240.5 actor_loss=0.1036 critic_loss=131362623115.6364 entropy=3.8727 ent_coef=0.001956 approx_kl=0.0038 kl_stop=1 intervention_rate=0.0840 front_blocked=0
|
|
[Episode 4880] reward=-54887678.9 actor_loss=0.0841 critic_loss=130292896689.2308 entropy=3.8715 ent_coef=0.001956 approx_kl=0.0040 kl_stop=1 intervention_rate=0.0788 front_blocked=0
|
|
[Eval 4880] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-451941.7 mean_steps=13.9
|
|
[Episode 4890] reward=-53905561.0 actor_loss=0.1064 critic_loss=125793629525.3333 entropy=3.8726 ent_coef=0.001956 approx_kl=0.0016 kl_stop=1 intervention_rate=0.0716 front_blocked=0
|
|
[Episode 4900] reward=-52522983.4 actor_loss=0.1143 critic_loss=120720935321.6000 entropy=3.8749 ent_coef=0.001956 approx_kl=0.0041 kl_stop=1 intervention_rate=0.0820 front_blocked=0
|
|
[Eval 4900] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-641666.6 mean_steps=12.1
|
|
[Episode 4910] reward=-58236224.8 actor_loss=0.1079 critic_loss=128691281920.0000 entropy=3.8789 ent_coef=0.001956 approx_kl=0.0021 kl_stop=1 intervention_rate=0.0853 front_blocked=0
|
|
[Episode 4920] reward=-48720802.2 actor_loss=0.1101 critic_loss=120195206348.8000 entropy=3.8777 ent_coef=0.001956 approx_kl=0.0020 kl_stop=1 intervention_rate=0.0755 front_blocked=0
|
|
[Eval 4920] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-577736.8 mean_steps=11.2
|
|
[Episode 4930] reward=-52982457.8 actor_loss=0.1003 critic_loss=119031321693.0909 entropy=3.8770 ent_coef=0.001956 approx_kl=0.0019 kl_stop=1 intervention_rate=0.0807 front_blocked=0
|
|
[Episode 4940] reward=-58487802.8 actor_loss=0.1099 critic_loss=128964450167.4667 entropy=3.8736 ent_coef=0.001956 approx_kl=0.0039 kl_stop=1 intervention_rate=0.0814 front_blocked=0
|
|
[Eval 4940] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-416978.7 mean_steps=13.5
|
|
[Episode 4950] reward=-52881271.7 actor_loss=0.1221 critic_loss=122544716458.6667 entropy=3.8745 ent_coef=0.001955 approx_kl=0.0013 kl_stop=1 intervention_rate=0.0872 front_blocked=0
|
|
[Episode 4960] reward=-48053261.6 actor_loss=0.1022 critic_loss=122634076160.0000 entropy=3.8710 ent_coef=0.001955 approx_kl=0.0032 kl_stop=1 intervention_rate=0.0762 front_blocked=0
|
|
[Eval 4960] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-540924.1 mean_steps=12.2
|
|
[Episode 4970] reward=-62579748.0 actor_loss=0.1305 critic_loss=127313984170.6667 entropy=3.8684 ent_coef=0.001955 approx_kl=0.0013 kl_stop=1 intervention_rate=0.0957 front_blocked=0
|
|
[Episode 4980] reward=-56498947.1 actor_loss=0.0850 critic_loss=127105827635.2000 entropy=3.8670 ent_coef=0.001955 approx_kl=0.0043 kl_stop=1 intervention_rate=0.0775 front_blocked=0
|
|
[Eval 4980] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-496425.7 mean_steps=13.8
|
|
[Episode 4990] reward=-51625895.1 actor_loss=0.1219 critic_loss=124214040439.4667 entropy=3.8659 ent_coef=0.001955 approx_kl=0.0035 kl_stop=1 intervention_rate=0.0775 front_blocked=0
|
|
[Episode 5000] reward=-51597222.6 actor_loss=0.1017 critic_loss=123842370218.6667 entropy=3.8661 ent_coef=0.001955 approx_kl=0.0014 kl_stop=1 intervention_rate=0.0762 front_blocked=0
|
|
[Eval 5000] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-309348.6 mean_steps=15.2
|
|
[Episode 5010] reward=-42295182.7 actor_loss=0.1084 critic_loss=117607977545.1429 entropy=3.8700 ent_coef=0.001955 approx_kl=0.0028 kl_stop=1 intervention_rate=0.0729 front_blocked=0
|
|
[Episode 5020] reward=-66580855.5 actor_loss=0.1345 critic_loss=135294022997.3333 entropy=3.8697 ent_coef=0.001955 approx_kl=0.0015 kl_stop=1 intervention_rate=0.0924 front_blocked=0
|
|
[Eval 5020] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-668671.0 mean_steps=11.2
|
|
[Episode 5030] reward=-60402870.1 actor_loss=0.1115 critic_loss=131981471744.0000 entropy=3.8696 ent_coef=0.001955 approx_kl=0.0018 kl_stop=1 intervention_rate=0.0898 front_blocked=0
|
|
[Episode 5040] reward=-50048344.6 actor_loss=0.1145 critic_loss=124659923502.5455 entropy=3.8659 ent_coef=0.001955 approx_kl=0.0015 kl_stop=1 intervention_rate=0.0859 front_blocked=0
|
|
[Eval 5040] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-524396.3 mean_steps=13.3
|
|
[Episode 5050] reward=-58020602.4 actor_loss=0.1411 critic_loss=126156621531.4286 entropy=3.8671 ent_coef=0.001955 approx_kl=0.0023 kl_stop=1 intervention_rate=0.0885 front_blocked=0
|
|
[Episode 5060] reward=-50845072.4 actor_loss=0.1182 critic_loss=121303519547.0769 entropy=3.8653 ent_coef=0.001954 approx_kl=0.0037 kl_stop=1 intervention_rate=0.0807 front_blocked=0
|
|
[Eval 5060] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-520556.5 mean_steps=12.6
|
|
[Episode 5070] reward=-57282796.6 actor_loss=0.1238 critic_loss=130108066084.5714 entropy=3.8628 ent_coef=0.001954 approx_kl=0.0051 kl_stop=1 intervention_rate=0.0859 front_blocked=0
|
|
[Episode 5080] reward=-46999911.4 actor_loss=0.1028 critic_loss=124126034797.7143 entropy=3.8632 ent_coef=0.001954 approx_kl=0.0027 kl_stop=1 intervention_rate=0.0801 front_blocked=0
|
|
[Eval 5080] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-476043.2 mean_steps=12.2
|
|
[Episode 5090] reward=-54293188.4 actor_loss=0.1096 critic_loss=128597309030.4000 entropy=3.8617 ent_coef=0.001954 approx_kl=0.0022 kl_stop=1 intervention_rate=0.0820 front_blocked=0
|
|
[Episode 5100] reward=-61452160.8 actor_loss=0.1219 critic_loss=132810306653.0909 entropy=3.8612 ent_coef=0.001954 approx_kl=0.0014 kl_stop=1 intervention_rate=0.0885 front_blocked=0
|
|
[Eval 5100] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-323282.0 mean_steps=14.8
|
|
[Episode 5110] reward=-50074643.8 actor_loss=0.1203 critic_loss=124889356288.0000 entropy=3.8655 ent_coef=0.001954 approx_kl=0.0040 kl_stop=1 intervention_rate=0.0853 front_blocked=0
|
|
[Episode 5120] reward=-55329402.0 actor_loss=0.0999 critic_loss=124800872448.0000 entropy=3.8651 ent_coef=0.001954 approx_kl=0.0021 kl_stop=1 intervention_rate=0.0872 front_blocked=0
|
|
[Eval 5120] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-386879.0 mean_steps=14.8
|
|
[Episode 5130] reward=-48151937.4 actor_loss=0.1024 critic_loss=120754772377.6000 entropy=3.8645 ent_coef=0.001954 approx_kl=0.0030 kl_stop=1 intervention_rate=0.0801 front_blocked=0
|
|
[Episode 5140] reward=-60539388.2 actor_loss=0.1128 critic_loss=130743401676.8000 entropy=3.8668 ent_coef=0.001954 approx_kl=0.0044 kl_stop=1 intervention_rate=0.0833 front_blocked=0
|
|
[Eval 5140] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-382870.8 mean_steps=14.8
|
|
[Episode 5150] reward=-47014461.6 actor_loss=0.1252 critic_loss=121936543744.0000 entropy=3.8655 ent_coef=0.001954 approx_kl=0.0027 kl_stop=1 intervention_rate=0.0697 front_blocked=0
|
|
[Episode 5160] reward=-43599377.6 actor_loss=0.0916 critic_loss=117139068245.3333 entropy=3.8654 ent_coef=0.001954 approx_kl=0.0029 kl_stop=1 intervention_rate=0.0775 front_blocked=0
|
|
[Eval 5160] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-537207.8 mean_steps=12.7
|
|
[Episode 5170] reward=-57361148.7 actor_loss=0.1228 critic_loss=128392547328.0000 entropy=3.8663 ent_coef=0.001953 approx_kl=0.0028 kl_stop=1 intervention_rate=0.0833 front_blocked=0
|
|
[Episode 5180] reward=-61497648.5 actor_loss=0.1157 critic_loss=129636996983.4667 entropy=3.8665 ent_coef=0.001953 approx_kl=0.0014 kl_stop=1 intervention_rate=0.0879 front_blocked=0
|
|
[Eval 5180] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-531182.6 mean_steps=12.7
|
|
[Episode 5190] reward=-52068571.0 actor_loss=0.1239 critic_loss=127675010486.8571 entropy=3.8668 ent_coef=0.001953 approx_kl=0.0027 kl_stop=1 intervention_rate=0.0866 front_blocked=0
|
|
[Episode 5200] reward=-60858707.8 actor_loss=0.1341 critic_loss=131174956032.0000 entropy=3.8681 ent_coef=0.001953 approx_kl=0.0032 kl_stop=1 intervention_rate=0.0918 front_blocked=0
|
|
[Eval 5200] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-440049.2 mean_steps=14.7
|
|
[Episode 5210] reward=-50753823.6 actor_loss=0.1192 critic_loss=122370725701.8182 entropy=3.8651 ent_coef=0.001953 approx_kl=0.0025 kl_stop=1 intervention_rate=0.0853 front_blocked=0
|
|
[Episode 5220] reward=-51730319.6 actor_loss=0.1072 critic_loss=122572604136.7273 entropy=3.8595 ent_coef=0.001953 approx_kl=0.0038 kl_stop=1 intervention_rate=0.0827 front_blocked=0
|
|
[Eval 5220] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-488568.1 mean_steps=13.3
|
|
[Episode 5230] reward=-50537197.5 actor_loss=0.1107 critic_loss=121275283163.4286 entropy=3.8575 ent_coef=0.001953 approx_kl=0.0038 kl_stop=1 intervention_rate=0.0814 front_blocked=0
|
|
[Episode 5240] reward=-42206197.8 actor_loss=0.1010 critic_loss=122260605952.0000 entropy=3.8572 ent_coef=0.001953 approx_kl=0.0051 kl_stop=1 intervention_rate=0.0749 front_blocked=0
|
|
[Eval 5240] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-540067.8 mean_steps=12.8
|
|
[Episode 5250] reward=-46832727.9 actor_loss=0.1208 critic_loss=122305971086.2222 entropy=3.8577 ent_coef=0.001953 approx_kl=0.0035 kl_stop=1 intervention_rate=0.0807 front_blocked=0
|
|
[Episode 5260] reward=-57167995.1 actor_loss=0.1336 critic_loss=127616021065.1429 entropy=3.8587 ent_coef=0.001953 approx_kl=0.0034 kl_stop=1 intervention_rate=0.0833 front_blocked=0
|
|
[Eval 5260] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-451516.0 mean_steps=14.4
|
|
[Episode 5270] reward=-46822664.8 actor_loss=0.1117 critic_loss=119246897152.0000 entropy=3.8606 ent_coef=0.001953 approx_kl=0.0031 kl_stop=1 intervention_rate=0.0814 front_blocked=0
|
|
[Episode 5280] reward=-57476281.0 actor_loss=0.1200 critic_loss=130106472605.5385 entropy=3.8575 ent_coef=0.001952 approx_kl=0.0032 kl_stop=1 intervention_rate=0.0807 front_blocked=0
|
|
[Eval 5280] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-285717.2 mean_steps=14.1
|
|
[Episode 5290] reward=-44710234.9 actor_loss=0.1086 critic_loss=123084475830.8571 entropy=3.8579 ent_coef=0.001952 approx_kl=0.0026 kl_stop=1 intervention_rate=0.0788 front_blocked=0
|
|
[Episode 5300] reward=-54911783.4 actor_loss=0.1164 critic_loss=127943613644.8000 entropy=3.8538 ent_coef=0.001952 approx_kl=0.0041 kl_stop=1 intervention_rate=0.0807 front_blocked=0
|
|
[Eval 5300] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-430326.2 mean_steps=14.3
|
|
[Episode 5310] reward=-52740272.8 actor_loss=0.1192 critic_loss=126886868309.3333 entropy=3.8526 ent_coef=0.001952 approx_kl=0.0026 kl_stop=1 intervention_rate=0.0807 front_blocked=0
|
|
[Episode 5320] reward=-44748104.5 actor_loss=0.1108 critic_loss=120628446094.2222 entropy=3.8531 ent_coef=0.001952 approx_kl=0.0029 kl_stop=1 intervention_rate=0.0833 front_blocked=0
|
|
[Eval 5320] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-529804.5 mean_steps=12.4
|
|
[Episode 5330] reward=-54192710.2 actor_loss=0.1200 critic_loss=126537945403.0769 entropy=3.8563 ent_coef=0.001952 approx_kl=0.0027 kl_stop=1 intervention_rate=0.0807 front_blocked=0
|
|
[Episode 5340] reward=-52366451.2 actor_loss=0.1025 critic_loss=126149253997.7143 entropy=3.8568 ent_coef=0.001952 approx_kl=0.0029 kl_stop=1 intervention_rate=0.0827 front_blocked=0
|
|
[Eval 5340] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-567751.7 mean_steps=11.9
|
|
[Episode 5350] reward=-51757720.7 actor_loss=0.1195 critic_loss=121998376960.0000 entropy=3.8588 ent_coef=0.001952 approx_kl=0.0033 kl_stop=1 intervention_rate=0.0846 front_blocked=0
|
|
[Episode 5360] reward=-59493993.6 actor_loss=0.1212 critic_loss=127674181924.5714 entropy=3.8620 ent_coef=0.001952 approx_kl=0.0040 kl_stop=1 intervention_rate=0.0931 front_blocked=0
|
|
[Eval 5360] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-307882.4 mean_steps=15.8
|
|
[Episode 5370] reward=-49911004.0 actor_loss=0.0909 critic_loss=123092995150.7692 entropy=3.8620 ent_coef=0.001952 approx_kl=0.0025 kl_stop=1 intervention_rate=0.0801 front_blocked=0
|
|
[Episode 5380] reward=-49969252.9 actor_loss=0.0782 critic_loss=119975956480.0000 entropy=3.8634 ent_coef=0.001952 approx_kl=0.0033 kl_stop=1 intervention_rate=0.0742 front_blocked=0
|
|
[Eval 5380] success_rate=0.750 qp_infeasible_rate=0.250 mean_return=-145824.2 mean_steps=17.1
|
|
[Episode 5390] reward=-58723895.4 actor_loss=0.1070 critic_loss=128805766565.6471 entropy=3.8623 ent_coef=0.001951 approx_kl=0.0037 kl_stop=1 intervention_rate=0.0892 front_blocked=0
|
|
[Episode 5400] reward=-58472600.3 actor_loss=0.1330 critic_loss=131355266252.8000 entropy=3.8621 ent_coef=0.001951 approx_kl=0.0034 kl_stop=1 intervention_rate=0.0859 front_blocked=0
|
|
[Eval 5400] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-472255.8 mean_steps=13.2
|
|
[Episode 5410] reward=-64527756.1 actor_loss=0.1421 critic_loss=130232475648.0000 entropy=3.8644 ent_coef=0.001951 approx_kl=0.0041 kl_stop=1 intervention_rate=0.0931 front_blocked=0
|
|
[Episode 5420] reward=-50865665.7 actor_loss=0.0866 critic_loss=123359895552.0000 entropy=3.8663 ent_coef=0.001951 approx_kl=0.0020 kl_stop=1 intervention_rate=0.0781 front_blocked=0
|
|
[Eval 5420] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-430069.8 mean_steps=12.7
|
|
[Episode 5430] reward=-52565189.3 actor_loss=0.1296 critic_loss=127027067562.6667 entropy=3.8630 ent_coef=0.001951 approx_kl=0.0024 kl_stop=1 intervention_rate=0.0879 front_blocked=0
|
|
[Episode 5440] reward=-53085085.3 actor_loss=0.1229 critic_loss=124371605367.4667 entropy=3.8665 ent_coef=0.001951 approx_kl=0.0038 kl_stop=1 intervention_rate=0.0918 front_blocked=0
|
|
[Eval 5440] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-368895.0 mean_steps=13.7
|
|
[Episode 5450] reward=-56831610.7 actor_loss=0.1129 critic_loss=126748641689.6000 entropy=3.8630 ent_coef=0.001951 approx_kl=0.0048 kl_stop=1 intervention_rate=0.0781 front_blocked=0
|
|
[Episode 5460] reward=-48435551.0 actor_loss=0.1243 critic_loss=124365000704.0000 entropy=3.8651 ent_coef=0.001951 approx_kl=0.0022 kl_stop=1 intervention_rate=0.0820 front_blocked=0
|
|
[Eval 5460] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-359388.1 mean_steps=14.6
|
|
[Episode 5470] reward=-58731044.1 actor_loss=0.1209 critic_loss=124774719488.0000 entropy=3.8666 ent_coef=0.001951 approx_kl=0.0029 kl_stop=1 intervention_rate=0.0944 front_blocked=0
|
|
[Episode 5480] reward=-48953542.6 actor_loss=0.1030 critic_loss=123405882163.2000 entropy=3.8639 ent_coef=0.001951 approx_kl=0.0032 kl_stop=1 intervention_rate=0.0807 front_blocked=0
|
|
[Eval 5480] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-361726.2 mean_steps=15.2
|
|
[Episode 5490] reward=-58792043.4 actor_loss=0.1017 critic_loss=126098132406.8571 entropy=3.8630 ent_coef=0.001951 approx_kl=0.0031 kl_stop=1 intervention_rate=0.0872 front_blocked=0
|
|
[Episode 5500] reward=-56417404.2 actor_loss=0.1078 critic_loss=124879476549.8182 entropy=3.8628 ent_coef=0.001951 approx_kl=0.0024 kl_stop=1 intervention_rate=0.0866 front_blocked=0
|
|
[Eval 5500] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-447845.5 mean_steps=13.5
|
|
[Episode 5510] reward=-59884253.2 actor_loss=0.1105 critic_loss=125781613772.8000 entropy=3.8555 ent_coef=0.001950 approx_kl=0.0012 kl_stop=1 intervention_rate=0.0742 front_blocked=0
|
|
[Episode 5520] reward=-60499740.1 actor_loss=0.1305 critic_loss=127998656512.0000 entropy=3.8529 ent_coef=0.001950 approx_kl=0.0028 kl_stop=1 intervention_rate=0.0931 front_blocked=0
|
|
[Eval 5520] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-457582.6 mean_steps=12.9
|
|
[Episode 5530] reward=-53263547.8 actor_loss=0.1159 critic_loss=123241477632.0000 entropy=3.8497 ent_coef=0.001950 approx_kl=0.0045 kl_stop=1 intervention_rate=0.0853 front_blocked=0
|
|
[Episode 5540] reward=-65245937.5 actor_loss=0.0924 critic_loss=131394729574.4000 entropy=3.8500 ent_coef=0.001950 approx_kl=0.0024 kl_stop=1 intervention_rate=0.0788 front_blocked=0
|
|
[Eval 5540] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-428942.2 mean_steps=14.2
|
|
[Episode 5550] reward=-60399582.6 actor_loss=0.1038 critic_loss=130551246848.0000 entropy=3.8489 ent_coef=0.001950 approx_kl=0.0036 kl_stop=1 intervention_rate=0.0931 front_blocked=0
|
|
[Episode 5560] reward=-51726540.5 actor_loss=0.0937 critic_loss=123265858413.7143 entropy=3.8511 ent_coef=0.001950 approx_kl=0.0025 kl_stop=1 intervention_rate=0.0755 front_blocked=0
|
|
[Eval 5560] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-482158.3 mean_steps=12.9
|
|
[Episode 5570] reward=-63816785.8 actor_loss=0.1231 critic_loss=127744609484.8000 entropy=3.8472 ent_coef=0.001950 approx_kl=0.0029 kl_stop=1 intervention_rate=0.0918 front_blocked=0
|
|
[Episode 5580] reward=-51123550.0 actor_loss=0.1230 critic_loss=123289936502.1538 entropy=3.8449 ent_coef=0.001950 approx_kl=0.0022 kl_stop=1 intervention_rate=0.0807 front_blocked=0
|
|
[Eval 5580] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-394861.9 mean_steps=14.2
|
|
[Episode 5590] reward=-45671535.0 actor_loss=0.1124 critic_loss=120138888806.4000 entropy=3.8367 ent_coef=0.001950 approx_kl=0.0025 kl_stop=1 intervention_rate=0.0781 front_blocked=0
|
|
[Episode 5600] reward=-45094066.3 actor_loss=0.0995 critic_loss=121607987200.0000 entropy=3.8363 ent_coef=0.001950 approx_kl=0.0038 kl_stop=1 intervention_rate=0.0742 front_blocked=0
|
|
[Eval 5600] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-459198.2 mean_steps=14.3
|
|
[Episode 5610] reward=-49350629.7 actor_loss=0.0927 critic_loss=122850045440.0000 entropy=3.8346 ent_coef=0.001950 approx_kl=0.0038 kl_stop=1 intervention_rate=0.0814 front_blocked=0
|
|
[Episode 5620] reward=-54295644.5 actor_loss=0.1105 critic_loss=128851374080.0000 entropy=3.8351 ent_coef=0.001949 approx_kl=0.0039 kl_stop=1 intervention_rate=0.0788 front_blocked=0
|
|
[Eval 5620] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-501230.0 mean_steps=13.3
|
|
[Episode 5630] reward=-60660470.5 actor_loss=0.1192 critic_loss=128999669760.0000 entropy=3.8370 ent_coef=0.001949 approx_kl=0.0042 kl_stop=1 intervention_rate=0.0820 front_blocked=0
|
|
[Episode 5640] reward=-38778852.5 actor_loss=0.0844 critic_loss=118285157612.3077 entropy=3.8369 ent_coef=0.001949 approx_kl=0.0011 kl_stop=1 intervention_rate=0.0697 front_blocked=0
|
|
[Eval 5640] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-502816.5 mean_steps=12.4
|
|
[Episode 5650] reward=-55186022.1 actor_loss=0.1150 critic_loss=125803957816.8889 entropy=3.8365 ent_coef=0.001949 approx_kl=0.0039 kl_stop=1 intervention_rate=0.0885 front_blocked=0
|
|
[Episode 5660] reward=-50247181.0 actor_loss=0.0985 critic_loss=123255053994.6667 entropy=3.8334 ent_coef=0.001949 approx_kl=0.0031 kl_stop=1 intervention_rate=0.0755 front_blocked=0
|
|
[Eval 5660] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-499742.5 mean_steps=13.4
|
|
[Episode 5670] reward=-56538913.7 actor_loss=0.0999 critic_loss=128786796916.3636 entropy=3.8336 ent_coef=0.001949 approx_kl=0.0020 kl_stop=1 intervention_rate=0.0853 front_blocked=0
|
|
[Episode 5680] reward=-48522261.1 actor_loss=0.1126 critic_loss=125409119670.8571 entropy=3.8257 ent_coef=0.001949 approx_kl=0.0018 kl_stop=1 intervention_rate=0.0801 front_blocked=0
|
|
[Eval 5680] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-483805.6 mean_steps=13.8
|
|
[Episode 5690] reward=-60317652.7 actor_loss=0.0905 critic_loss=129912406016.0000 entropy=3.8225 ent_coef=0.001949 approx_kl=0.0025 kl_stop=1 intervention_rate=0.0853 front_blocked=0
|
|
[Episode 5700] reward=-61829989.2 actor_loss=0.1329 critic_loss=130705299865.6000 entropy=3.8213 ent_coef=0.001949 approx_kl=0.0035 kl_stop=1 intervention_rate=0.0892 front_blocked=0
|
|
[Eval 5700] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-280217.7 mean_steps=14.9
|
|
[Episode 5710] reward=-52180839.6 actor_loss=0.0762 critic_loss=126383894528.0000 entropy=3.8183 ent_coef=0.001949 approx_kl=0.0042 kl_stop=1 intervention_rate=0.0742 front_blocked=0
|
|
[Episode 5720] reward=-56108631.4 actor_loss=0.1171 critic_loss=126062212189.0909 entropy=3.8162 ent_coef=0.001949 approx_kl=0.0019 kl_stop=1 intervention_rate=0.0859 front_blocked=0
|
|
[Eval 5720] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-375377.3 mean_steps=15.7
|
|
[Episode 5730] reward=-59568716.3 actor_loss=0.1212 critic_loss=129838807267.5556 entropy=3.8156 ent_coef=0.001948 approx_kl=0.0047 kl_stop=1 intervention_rate=0.0892 front_blocked=0
|
|
[Episode 5740] reward=-60175050.6 actor_loss=0.1334 critic_loss=129446785609.1429 entropy=3.8159 ent_coef=0.001948 approx_kl=0.0018 kl_stop=1 intervention_rate=0.0951 front_blocked=0
|
|
[Eval 5740] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-454530.7 mean_steps=13.8
|
|
[Episode 5750] reward=-52283716.2 actor_loss=0.1072 critic_loss=123994996224.0000 entropy=3.8129 ent_coef=0.001948 approx_kl=0.0023 kl_stop=1 intervention_rate=0.0781 front_blocked=0
|
|
[Episode 5760] reward=-52141695.9 actor_loss=0.1077 critic_loss=125856670440.7273 entropy=3.8104 ent_coef=0.001948 approx_kl=0.0012 kl_stop=1 intervention_rate=0.0775 front_blocked=0
|
|
[Eval 5760] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-520221.1 mean_steps=11.6
|
|
[Episode 5770] reward=-53056063.3 actor_loss=0.0911 critic_loss=125292298984.7273 entropy=3.8079 ent_coef=0.001948 approx_kl=0.0036 kl_stop=1 intervention_rate=0.0781 front_blocked=0
|
|
[Episode 5780] reward=-46202819.1 actor_loss=0.0870 critic_loss=123786710667.6364 entropy=3.8091 ent_coef=0.001948 approx_kl=0.0032 kl_stop=1 intervention_rate=0.0723 front_blocked=0
|
|
[Eval 5780] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-398148.1 mean_steps=13.4
|
|
[Episode 5790] reward=-51423862.9 actor_loss=0.1217 critic_loss=122323135341.7143 entropy=3.8089 ent_coef=0.001948 approx_kl=0.0051 kl_stop=1 intervention_rate=0.0801 front_blocked=0
|
|
[Episode 5800] reward=-65599137.8 actor_loss=0.0854 critic_loss=131776888832.0000 entropy=3.8110 ent_coef=0.001948 approx_kl=0.0028 kl_stop=1 intervention_rate=0.0840 front_blocked=0
|
|
[Eval 5800] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-567558.3 mean_steps=12.6
|
|
[Episode 5810] reward=-58656931.0 actor_loss=0.1438 critic_loss=128303987712.0000 entropy=3.8089 ent_coef=0.001948 approx_kl=0.0024 kl_stop=1 intervention_rate=0.0905 front_blocked=0
|
|
[Episode 5820] reward=-48897507.2 actor_loss=0.1245 critic_loss=119767896064.0000 entropy=3.8104 ent_coef=0.001948 approx_kl=0.0034 kl_stop=1 intervention_rate=0.0827 front_blocked=0
|
|
[Eval 5820] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-550195.5 mean_steps=13.3
|
|
[Episode 5830] reward=-52604119.0 actor_loss=0.1176 critic_loss=122333643776.0000 entropy=3.8106 ent_coef=0.001948 approx_kl=0.0027 kl_stop=1 intervention_rate=0.0801 front_blocked=0
|
|
[Episode 5840] reward=-57006657.8 actor_loss=0.1141 critic_loss=128219435463.1111 entropy=3.8132 ent_coef=0.001947 approx_kl=0.0021 kl_stop=1 intervention_rate=0.0866 front_blocked=0
|
|
[Eval 5840] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-469916.0 mean_steps=12.9
|
|
[Episode 5850] reward=-55497761.7 actor_loss=0.0935 critic_loss=123884711936.0000 entropy=3.8109 ent_coef=0.001947 approx_kl=0.0017 kl_stop=1 intervention_rate=0.0827 front_blocked=0
|
|
[Episode 5860] reward=-54523126.6 actor_loss=0.1182 critic_loss=124248637440.0000 entropy=3.8120 ent_coef=0.001947 approx_kl=0.0032 kl_stop=1 intervention_rate=0.0846 front_blocked=0
|
|
[Eval 5860] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-595617.5 mean_steps=12.2
|
|
[Episode 5870] reward=-51269094.1 actor_loss=0.1144 critic_loss=119613450752.0000 entropy=3.8096 ent_coef=0.001947 approx_kl=0.0002 kl_stop=1 intervention_rate=0.0892 front_blocked=0
|
|
[Episode 5880] reward=-57847761.3 actor_loss=0.1281 critic_loss=130787125020.4444 entropy=3.8101 ent_coef=0.001947 approx_kl=0.0023 kl_stop=1 intervention_rate=0.0898 front_blocked=0
|
|
[Eval 5880] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-525200.4 mean_steps=12.6
|
|
[Episode 5890] reward=-52345718.8 actor_loss=0.1194 critic_loss=123693696341.3333 entropy=3.8084 ent_coef=0.001947 approx_kl=0.0033 kl_stop=1 intervention_rate=0.0827 front_blocked=0
|
|
[Episode 5900] reward=-52081911.6 actor_loss=0.1213 critic_loss=125589842329.6000 entropy=3.8073 ent_coef=0.001947 approx_kl=0.0024 kl_stop=1 intervention_rate=0.0872 front_blocked=0
|
|
[Eval 5900] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-461990.1 mean_steps=14.0
|
|
[Episode 5910] reward=-53668786.6 actor_loss=0.1166 critic_loss=122947381248.0000 entropy=3.8070 ent_coef=0.001947 approx_kl=0.0013 kl_stop=1 intervention_rate=0.0801 front_blocked=0
|
|
[Episode 5920] reward=-50625962.1 actor_loss=0.1185 critic_loss=121209239552.0000 entropy=3.8086 ent_coef=0.001947 approx_kl=-0.0003 kl_stop=1 intervention_rate=0.0846 front_blocked=0
|
|
[Eval 5920] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-404474.5 mean_steps=15.1
|
|
[Episode 5930] reward=-44963672.4 actor_loss=0.1047 critic_loss=118263156736.0000 entropy=3.8069 ent_coef=0.001947 approx_kl=0.0028 kl_stop=1 intervention_rate=0.0840 front_blocked=0
|
|
[Episode 5940] reward=-50012405.3 actor_loss=0.1138 critic_loss=123468590421.3333 entropy=3.8078 ent_coef=0.001947 approx_kl=0.0023 kl_stop=1 intervention_rate=0.0846 front_blocked=0
|
|
[Eval 5940] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-327442.8 mean_steps=15.3
|
|
[Episode 5950] reward=-67690971.4 actor_loss=0.1234 critic_loss=132652173154.4615 entropy=3.8082 ent_coef=0.001946 approx_kl=0.0022 kl_stop=1 intervention_rate=0.0938 front_blocked=0
|
|
[Episode 5960] reward=-48926351.2 actor_loss=0.1046 critic_loss=119834329600.0000 entropy=3.8028 ent_coef=0.001946 approx_kl=0.0032 kl_stop=1 intervention_rate=0.0872 front_blocked=0
|
|
[Eval 5960] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-380877.0 mean_steps=15.1
|
|
[Episode 5970] reward=-56589842.2 actor_loss=0.1181 critic_loss=127384626972.4444 entropy=3.7990 ent_coef=0.001946 approx_kl=0.0016 kl_stop=1 intervention_rate=0.0859 front_blocked=0
|
|
[Episode 5980] reward=-45692706.1 actor_loss=0.0827 critic_loss=118188436138.6667 entropy=3.7981 ent_coef=0.001946 approx_kl=0.0031 kl_stop=1 intervention_rate=0.0801 front_blocked=0
|
|
[Eval 5980] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-413677.0 mean_steps=12.5
|
|
[Episode 5990] reward=-49385972.3 actor_loss=0.1111 critic_loss=124628365767.1111 entropy=3.7957 ent_coef=0.001946 approx_kl=0.0048 kl_stop=1 intervention_rate=0.0859 front_blocked=0
|
|
[Episode 6000] reward=-46072148.9 actor_loss=0.0950 critic_loss=118422845147.4286 entropy=3.7964 ent_coef=0.001946 approx_kl=0.0033 kl_stop=1 intervention_rate=0.0703 front_blocked=0
|
|
[Eval 6000] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-526229.0 mean_steps=12.6
|
|
[Episode 6010] reward=-58159279.1 actor_loss=0.0998 critic_loss=127742981461.3333 entropy=3.7948 ent_coef=0.001946 approx_kl=0.0019 kl_stop=1 intervention_rate=0.0879 front_blocked=0
|
|
[Episode 6020] reward=-49893128.5 actor_loss=0.1262 critic_loss=124756449093.8182 entropy=3.7941 ent_coef=0.001946 approx_kl=0.0027 kl_stop=1 intervention_rate=0.0749 front_blocked=0
|
|
[Eval 6020] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-442076.3 mean_steps=13.7
|
|
[Episode 6030] reward=-54773917.6 actor_loss=0.0845 critic_loss=127282054758.4000 entropy=3.7958 ent_coef=0.001946 approx_kl=0.0011 kl_stop=1 intervention_rate=0.0762 front_blocked=0
|
|
[Episode 6040] reward=-44930094.2 actor_loss=0.0992 critic_loss=118228424021.3333 entropy=3.7969 ent_coef=0.001946 approx_kl=0.0037 kl_stop=1 intervention_rate=0.0788 front_blocked=0
|
|
[Eval 6040] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-510261.3 mean_steps=12.6
|
|
[Episode 6050] reward=-54712197.9 actor_loss=0.0971 critic_loss=125386857813.3333 entropy=3.7972 ent_coef=0.001946 approx_kl=0.0012 kl_stop=1 intervention_rate=0.0872 front_blocked=0
|
|
[Episode 6060] reward=-56839666.1 actor_loss=0.1240 critic_loss=129793201152.0000 entropy=3.7954 ent_coef=0.001945 approx_kl=0.0035 kl_stop=1 intervention_rate=0.0833 front_blocked=0
|
|
[Eval 6060] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-539579.0 mean_steps=10.5
|
|
[Episode 6070] reward=-63759057.7 actor_loss=0.1013 critic_loss=134723571712.0000 entropy=3.7955 ent_coef=0.001945 approx_kl=0.0019 kl_stop=1 intervention_rate=0.0853 front_blocked=0
|
|
[Episode 6080] reward=-49608507.5 actor_loss=0.1363 critic_loss=120986021614.9333 entropy=3.7931 ent_coef=0.001945 approx_kl=0.0027 kl_stop=1 intervention_rate=0.0827 front_blocked=0
|
|
[Eval 6080] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-391962.7 mean_steps=14.1
|
|
[Episode 6090] reward=-59309888.1 actor_loss=0.0998 critic_loss=128170119168.0000 entropy=3.7878 ent_coef=0.001945 approx_kl=0.0025 kl_stop=1 intervention_rate=0.0879 front_blocked=0
|
|
[Episode 6100] reward=-70063599.8 actor_loss=0.1174 critic_loss=136435375104.0000 entropy=3.7888 ent_coef=0.001945 approx_kl=0.0049 kl_stop=1 intervention_rate=0.0898 front_blocked=0
|
|
[Eval 6100] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-341217.3 mean_steps=14.8
|
|
[Episode 6110] reward=-56399971.3 actor_loss=0.1382 critic_loss=125451077222.4000 entropy=3.7887 ent_coef=0.001945 approx_kl=0.0035 kl_stop=1 intervention_rate=0.0892 front_blocked=0
|
|
[Episode 6120] reward=-59807534.1 actor_loss=0.1353 critic_loss=127044015941.8182 entropy=3.7899 ent_coef=0.001945 approx_kl=0.0029 kl_stop=1 intervention_rate=0.0892 front_blocked=0
|
|
[Eval 6120] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-364804.1 mean_steps=14.9
|
|
[Episode 6130] reward=-57340012.9 actor_loss=0.1501 critic_loss=128764599149.7143 entropy=3.7888 ent_coef=0.001945 approx_kl=0.0014 kl_stop=1 intervention_rate=0.0918 front_blocked=0
|
|
[Episode 6140] reward=-40202964.6 actor_loss=0.0801 critic_loss=116106913564.4444 entropy=3.7829 ent_coef=0.001945 approx_kl=0.0047 kl_stop=1 intervention_rate=0.0736 front_blocked=0
|
|
[Eval 6140] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-429279.5 mean_steps=13.4
|
|
[Episode 6150] reward=-55540917.7 actor_loss=0.1031 critic_loss=128007293858.9091 entropy=3.7791 ent_coef=0.001945 approx_kl=0.0029 kl_stop=1 intervention_rate=0.0853 front_blocked=0
|
|
[Episode 6160] reward=-58673495.6 actor_loss=0.1444 critic_loss=127636921457.7778 entropy=3.7823 ent_coef=0.001945 approx_kl=0.0021 kl_stop=1 intervention_rate=0.0892 front_blocked=0
|
|
[Eval 6160] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-436041.2 mean_steps=12.8
|
|
[Episode 6170] reward=-43800760.0 actor_loss=0.0942 critic_loss=119444378055.1111 entropy=3.7841 ent_coef=0.001944 approx_kl=0.0016 kl_stop=1 intervention_rate=0.0846 front_blocked=0
|
|
[Episode 6180] reward=-52884536.0 actor_loss=0.1101 critic_loss=123547680023.2727 entropy=3.7835 ent_coef=0.001944 approx_kl=0.0034 kl_stop=1 intervention_rate=0.0788 front_blocked=0
|
|
[Eval 6180] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-434718.0 mean_steps=13.6
|
|
[Episode 6190] reward=-55392330.6 actor_loss=0.1233 critic_loss=126112101074.8235 entropy=3.7830 ent_coef=0.001944 approx_kl=-0.0001 kl_stop=1 intervention_rate=0.0879 front_blocked=0
|
|
[Episode 6200] reward=-56496231.7 actor_loss=0.1223 critic_loss=129679520563.2000 entropy=3.7790 ent_coef=0.001944 approx_kl=0.0038 kl_stop=1 intervention_rate=0.0794 front_blocked=0
|
|
[Eval 6200] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-481161.8 mean_steps=12.8
|
|
[Episode 6210] reward=-48699068.5 actor_loss=0.1140 critic_loss=123890232971.6364 entropy=3.7771 ent_coef=0.001944 approx_kl=0.0033 kl_stop=1 intervention_rate=0.0853 front_blocked=0
|
|
[Episode 6220] reward=-54372061.7 actor_loss=0.1305 critic_loss=126560613717.3333 entropy=3.7765 ent_coef=0.001944 approx_kl=0.0019 kl_stop=1 intervention_rate=0.0820 front_blocked=0
|
|
[Eval 6220] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-457374.2 mean_steps=13.7
|
|
[Episode 6230] reward=-46243916.8 actor_loss=0.1277 critic_loss=121809980620.8000 entropy=3.7785 ent_coef=0.001944 approx_kl=0.0024 kl_stop=1 intervention_rate=0.0814 front_blocked=0
|
|
[Episode 6240] reward=-57085355.0 actor_loss=0.1259 critic_loss=130095693824.0000 entropy=3.7801 ent_coef=0.001944 approx_kl=0.0015 kl_stop=1 intervention_rate=0.0892 front_blocked=0
|
|
[Eval 6240] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-424193.8 mean_steps=14.4
|
|
[Episode 6250] reward=-47636305.6 actor_loss=0.1131 critic_loss=118681343638.5882 entropy=3.7780 ent_coef=0.001944 approx_kl=0.0032 kl_stop=1 intervention_rate=0.0853 front_blocked=0
|
|
[Episode 6260] reward=-58586865.1 actor_loss=0.1116 critic_loss=126324031488.0000 entropy=3.7746 ent_coef=0.001944 approx_kl=0.0028 kl_stop=1 intervention_rate=0.0788 front_blocked=0
|
|
[Eval 6260] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-408089.0 mean_steps=14.9
|
|
[Episode 6270] reward=-53386128.8 actor_loss=0.0764 critic_loss=124500779008.0000 entropy=3.7773 ent_coef=0.001944 approx_kl=0.0025 kl_stop=1 intervention_rate=0.0801 front_blocked=0
|
|
[Episode 6280] reward=-58875080.1 actor_loss=0.1057 critic_loss=125435931949.1765 entropy=3.7757 ent_coef=0.001943 approx_kl=0.0041 kl_stop=1 intervention_rate=0.0853 front_blocked=0
|
|
[Eval 6280] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-400004.0 mean_steps=14.2
|
|
[Episode 6290] reward=-57528289.5 actor_loss=0.1096 critic_loss=129288091355.4286 entropy=3.7707 ent_coef=0.001943 approx_kl=0.0022 kl_stop=1 intervention_rate=0.0866 front_blocked=0
|
|
[Episode 6300] reward=-59981784.0 actor_loss=0.1317 critic_loss=134328797866.6667 entropy=3.7706 ent_coef=0.001943 approx_kl=0.0039 kl_stop=1 intervention_rate=0.0859 front_blocked=0
|
|
[Eval 6300] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-522159.8 mean_steps=12.6
|
|
[Episode 6310] reward=-51971665.5 actor_loss=0.1072 critic_loss=123635081960.7273 entropy=3.7685 ent_coef=0.001943 approx_kl=0.0039 kl_stop=1 intervention_rate=0.0814 front_blocked=0
|
|
[Episode 6320] reward=-49650954.1 actor_loss=0.0929 critic_loss=126549241856.0000 entropy=3.7671 ent_coef=0.001943 approx_kl=0.0025 kl_stop=1 intervention_rate=0.0794 front_blocked=0
|
|
[Eval 6320] success_rate=0.650 qp_infeasible_rate=0.350 mean_return=-304127.7 mean_steps=16.6
|
|
[Episode 6330] reward=-59260370.2 actor_loss=0.1214 critic_loss=128078443315.2000 entropy=3.7655 ent_coef=0.001943 approx_kl=0.0019 kl_stop=1 intervention_rate=0.0840 front_blocked=0
|
|
[Episode 6340] reward=-54564153.4 actor_loss=0.0748 critic_loss=123894034887.1111 entropy=3.7666 ent_coef=0.001943 approx_kl=0.0027 kl_stop=1 intervention_rate=0.0788 front_blocked=0
|
|
[Eval 6340] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-383349.5 mean_steps=14.8
|
|
[Episode 6350] reward=-47501515.5 actor_loss=0.1204 critic_loss=119385090340.5714 entropy=3.7649 ent_coef=0.001943 approx_kl=0.0028 kl_stop=1 intervention_rate=0.0742 front_blocked=0
|
|
[Episode 6360] reward=-42990930.4 actor_loss=0.1085 critic_loss=119931097403.0769 entropy=3.7603 ent_coef=0.001943 approx_kl=0.0066 kl_stop=1 intervention_rate=0.0723 front_blocked=0
|
|
[Eval 6360] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-361511.3 mean_steps=14.8
|
|
[Episode 6370] reward=-47795154.4 actor_loss=0.1061 critic_loss=120916781986.9091 entropy=3.7580 ent_coef=0.001943 approx_kl=0.0027 kl_stop=1 intervention_rate=0.0723 front_blocked=0
|
|
[Episode 6380] reward=-45409789.3 actor_loss=0.1125 critic_loss=123362796339.2000 entropy=3.7617 ent_coef=0.001943 approx_kl=0.0021 kl_stop=1 intervention_rate=0.0742 front_blocked=0
|
|
[Eval 6380] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-561889.7 mean_steps=12.9
|
|
[Episode 6390] reward=-53721702.3 actor_loss=0.1053 critic_loss=120553128755.2000 entropy=3.7649 ent_coef=0.001942 approx_kl=0.0012 kl_stop=1 intervention_rate=0.0866 front_blocked=0
|
|
[Episode 6400] reward=-62142813.1 actor_loss=0.0989 critic_loss=132075459584.0000 entropy=3.7673 ent_coef=0.001942 approx_kl=0.0022 kl_stop=1 intervention_rate=0.0866 front_blocked=0
|
|
[Eval 6400] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-482291.8 mean_steps=13.2
|
|
[Episode 6410] reward=-52114561.1 actor_loss=0.1208 critic_loss=124517016722.2857 entropy=3.7657 ent_coef=0.001942 approx_kl=0.0031 kl_stop=1 intervention_rate=0.0762 front_blocked=0
|
|
[Episode 6420] reward=-52178892.5 actor_loss=0.1335 critic_loss=125014429013.3333 entropy=3.7614 ent_coef=0.001942 approx_kl=0.0013 kl_stop=1 intervention_rate=0.0833 front_blocked=0
|
|
[Eval 6420] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-490285.5 mean_steps=13.1
|
|
[Episode 6430] reward=-57691211.8 actor_loss=0.1330 critic_loss=128464461277.8667 entropy=3.7647 ent_coef=0.001942 approx_kl=0.0029 kl_stop=1 intervention_rate=0.0918 front_blocked=0
|
|
[Episode 6440] reward=-48452971.4 actor_loss=0.0948 critic_loss=125374145142.1538 entropy=3.7631 ent_coef=0.001942 approx_kl=0.0020 kl_stop=1 intervention_rate=0.0755 front_blocked=0
|
|
[Eval 6440] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-385852.5 mean_steps=14.6
|
|
[Episode 6450] reward=-47053525.1 actor_loss=0.1211 critic_loss=119080279244.8000 entropy=3.7622 ent_coef=0.001942 approx_kl=0.0024 kl_stop=1 intervention_rate=0.0762 front_blocked=0
|
|
[Episode 6460] reward=-41055639.7 actor_loss=0.0949 critic_loss=118342227037.0909 entropy=3.7611 ent_coef=0.001942 approx_kl=0.0019 kl_stop=1 intervention_rate=0.0755 front_blocked=0
|
|
[Eval 6460] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-534815.2 mean_steps=12.8
|
|
[Episode 6470] reward=-58234883.2 actor_loss=0.1005 critic_loss=128269485056.0000 entropy=3.7572 ent_coef=0.001942 approx_kl=0.0015 kl_stop=1 intervention_rate=0.0846 front_blocked=0
|
|
[Episode 6480] reward=-48785298.2 actor_loss=0.1132 critic_loss=121063864027.4286 entropy=3.7572 ent_coef=0.001942 approx_kl=0.0045 kl_stop=1 intervention_rate=0.0775 front_blocked=0
|
|
[Eval 6480] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-606348.4 mean_steps=12.3
|
|
[Episode 6490] reward=-55776967.8 actor_loss=0.0985 critic_loss=127187373494.8571 entropy=3.7574 ent_coef=0.001942 approx_kl=0.0022 kl_stop=1 intervention_rate=0.0827 front_blocked=0
|
|
[Episode 6500] reward=-52524290.7 actor_loss=0.1294 critic_loss=122580742144.0000 entropy=3.7533 ent_coef=0.001942 approx_kl=0.0050 kl_stop=1 intervention_rate=0.0788 front_blocked=0
|
|
[Eval 6500] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-489363.2 mean_steps=12.9
|
|
[Episode 6510] reward=-49685282.8 actor_loss=0.1025 critic_loss=122033481728.0000 entropy=3.7491 ent_coef=0.001941 approx_kl=0.0041 kl_stop=1 intervention_rate=0.0710 front_blocked=0
|
|
[Episode 6520] reward=-51090126.3 actor_loss=0.1273 critic_loss=128021288667.4286 entropy=3.7478 ent_coef=0.001941 approx_kl=0.0034 kl_stop=1 intervention_rate=0.0788 front_blocked=0
|
|
[Eval 6520] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-445935.4 mean_steps=13.4
|
|
[Episode 6530] reward=-56767919.5 actor_loss=0.1310 critic_loss=130793908224.0000 entropy=3.7429 ent_coef=0.001941 approx_kl=0.0046 kl_stop=1 intervention_rate=0.0833 front_blocked=0
|
|
[Episode 6540] reward=-54376919.7 actor_loss=0.1370 critic_loss=127953669324.8000 entropy=3.7440 ent_coef=0.001941 approx_kl=0.0038 kl_stop=1 intervention_rate=0.0801 front_blocked=0
|
|
[Eval 6540] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-491515.6 mean_steps=13.0
|
|
[Episode 6550] reward=-50975398.0 actor_loss=0.1097 critic_loss=119685038824.7273 entropy=3.7439 ent_coef=0.001941 approx_kl=0.0018 kl_stop=1 intervention_rate=0.0866 front_blocked=0
|
|
[Episode 6560] reward=-64392310.0 actor_loss=0.1307 critic_loss=132116656850.8235 entropy=3.7432 ent_coef=0.001941 approx_kl=0.0036 kl_stop=1 intervention_rate=0.0918 front_blocked=0
|
|
[Eval 6560] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-378973.6 mean_steps=14.2
|
|
[Episode 6570] reward=-42697779.1 actor_loss=0.1016 critic_loss=116699324416.0000 entropy=3.7440 ent_coef=0.001941 approx_kl=0.0041 kl_stop=1 intervention_rate=0.0775 front_blocked=0
|
|
[Episode 6580] reward=-49508478.9 actor_loss=0.0618 critic_loss=123137197670.4000 entropy=3.7444 ent_coef=0.001941 approx_kl=0.0031 kl_stop=1 intervention_rate=0.0768 front_blocked=0
|
|
[Eval 6580] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-444662.1 mean_steps=12.8
|
|
[Episode 6590] reward=-45883682.4 actor_loss=0.0981 critic_loss=121877987328.0000 entropy=3.7430 ent_coef=0.001941 approx_kl=0.0042 kl_stop=1 intervention_rate=0.0762 front_blocked=0
|
|
[Episode 6600] reward=-66366627.9 actor_loss=0.1522 critic_loss=136212433188.5714 entropy=3.7497 ent_coef=0.001941 approx_kl=0.0027 kl_stop=1 intervention_rate=0.0931 front_blocked=0
|
|
[Eval 6600] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-433231.6 mean_steps=13.4
|
|
[Episode 6610] reward=-44243877.6 actor_loss=0.1129 critic_loss=118585235046.4000 entropy=3.7448 ent_coef=0.001941 approx_kl=0.0014 kl_stop=1 intervention_rate=0.0781 front_blocked=0
|
|
[Episode 6620] reward=-39498574.3 actor_loss=0.1172 critic_loss=113598804650.6667 entropy=3.7451 ent_coef=0.001940 approx_kl=0.0022 kl_stop=1 intervention_rate=0.0736 front_blocked=0
|
|
[Eval 6620] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-294059.8 mean_steps=15.9
|
|
[Episode 6630] reward=-51550700.8 actor_loss=0.0833 critic_loss=120838140814.2222 entropy=3.7487 ent_coef=0.001940 approx_kl=0.0024 kl_stop=1 intervention_rate=0.0781 front_blocked=0
|
|
[Episode 6640] reward=-55891162.2 actor_loss=0.1164 critic_loss=123752897649.7778 entropy=3.7509 ent_coef=0.001940 approx_kl=0.0039 kl_stop=1 intervention_rate=0.0820 front_blocked=0
|
|
[Eval 6640] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-517499.9 mean_steps=12.2
|
|
[Episode 6650] reward=-52486152.0 actor_loss=0.1315 critic_loss=127387096901.8182 entropy=3.7535 ent_coef=0.001940 approx_kl=0.0024 kl_stop=1 intervention_rate=0.0762 front_blocked=0
|
|
[Episode 6660] reward=-45687645.8 actor_loss=0.1230 critic_loss=119409232164.5714 entropy=3.7522 ent_coef=0.001940 approx_kl=0.0035 kl_stop=1 intervention_rate=0.0762 front_blocked=0
|
|
[Eval 6660] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-422215.0 mean_steps=14.2
|
|
[Episode 6670] reward=-52267088.9 actor_loss=0.1434 critic_loss=126078618072.6154 entropy=3.7515 ent_coef=0.001940 approx_kl=0.0020 kl_stop=1 intervention_rate=0.0846 front_blocked=0
|
|
[Episode 6680] reward=-61313794.7 actor_loss=0.1300 critic_loss=129620739072.0000 entropy=3.7550 ent_coef=0.001940 approx_kl=0.0035 kl_stop=1 intervention_rate=0.0840 front_blocked=0
|
|
[Eval 6680] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-556823.6 mean_steps=12.6
|
|
[Episode 6690] reward=-48504724.4 actor_loss=0.0838 critic_loss=122118043805.5385 entropy=3.7542 ent_coef=0.001940 approx_kl=0.0034 kl_stop=1 intervention_rate=0.0788 front_blocked=0
|
|
[Episode 6700] reward=-50924510.7 actor_loss=0.1050 critic_loss=125786469297.2308 entropy=3.7477 ent_coef=0.001940 approx_kl=0.0029 kl_stop=1 intervention_rate=0.0710 front_blocked=0
|
|
[Eval 6700] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-249894.6 mean_steps=15.3
|
|
[Episode 6710] reward=-51841159.0 actor_loss=0.1068 critic_loss=121644180419.7647 entropy=3.7439 ent_coef=0.001940 approx_kl=0.0028 kl_stop=1 intervention_rate=0.0833 front_blocked=0
|
|
[Episode 6720] reward=-60841021.5 actor_loss=0.1417 critic_loss=128173643776.0000 entropy=3.7429 ent_coef=0.001940 approx_kl=0.0025 kl_stop=1 intervention_rate=0.0931 front_blocked=0
|
|
[Eval 6720] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-589421.0 mean_steps=12.1
|
|
[Episode 6730] reward=-56420682.7 actor_loss=0.1093 critic_loss=129748850005.3333 entropy=3.7426 ent_coef=0.001939 approx_kl=0.0017 kl_stop=1 intervention_rate=0.0898 front_blocked=0
|
|
[Episode 6740] reward=-69907296.8 actor_loss=0.1375 critic_loss=133539708928.0000 entropy=3.7408 ent_coef=0.001939 approx_kl=0.0042 kl_stop=1 intervention_rate=0.0951 front_blocked=0
|
|
[Eval 6740] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-445908.9 mean_steps=11.8
|
|
[Episode 6750] reward=-69961479.8 actor_loss=0.1168 critic_loss=134313040281.6000 entropy=3.7370 ent_coef=0.001939 approx_kl=0.0028 kl_stop=1 intervention_rate=0.0990 front_blocked=0
|
|
[Episode 6760] reward=-60392488.4 actor_loss=0.1155 critic_loss=129717967725.7143 entropy=3.7364 ent_coef=0.001939 approx_kl=0.0038 kl_stop=1 intervention_rate=0.0892 front_blocked=0
|
|
[Eval 6760] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-487256.7 mean_steps=13.1
|
|
[Episode 6770] reward=-53976811.2 actor_loss=0.1467 critic_loss=127181395558.4000 entropy=3.7344 ent_coef=0.001939 approx_kl=0.0020 kl_stop=1 intervention_rate=0.0872 front_blocked=0
|
|
[Episode 6780] reward=-57334098.2 actor_loss=0.1233 critic_loss=130803801088.0000 entropy=3.7364 ent_coef=0.001939 approx_kl=0.0024 kl_stop=1 intervention_rate=0.0898 front_blocked=0
|
|
[Eval 6780] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-507045.7 mean_steps=12.3
|
|
[Episode 6790] reward=-57820835.0 actor_loss=0.1085 critic_loss=126309880393.1429 entropy=3.7348 ent_coef=0.001939 approx_kl=0.0016 kl_stop=1 intervention_rate=0.0801 front_blocked=0
|
|
[Episode 6800] reward=-54127538.7 actor_loss=0.1277 critic_loss=125182012416.0000 entropy=3.7314 ent_coef=0.001939 approx_kl=0.0036 kl_stop=1 intervention_rate=0.0827 front_blocked=0
|
|
[Eval 6800] success_rate=0.050 qp_infeasible_rate=0.950 mean_return=-755012.3 mean_steps=10.1
|
|
[Episode 6810] reward=-39666322.8 actor_loss=0.1114 critic_loss=115283787776.0000 entropy=3.7302 ent_coef=0.001939 approx_kl=0.0040 kl_stop=1 intervention_rate=0.0723 front_blocked=0
|
|
[Episode 6820] reward=-52180670.6 actor_loss=0.1450 critic_loss=124205505740.8000 entropy=3.7296 ent_coef=0.001939 approx_kl=0.0026 kl_stop=1 intervention_rate=0.0853 front_blocked=0
|
|
[Eval 6820] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-458877.0 mean_steps=14.3
|
|
[Episode 6830] reward=-51380138.9 actor_loss=0.1123 critic_loss=125610481371.4286 entropy=3.7269 ent_coef=0.001939 approx_kl=0.0041 kl_stop=1 intervention_rate=0.0820 front_blocked=0
|
|
[Episode 6840] reward=-41196776.4 actor_loss=0.1161 critic_loss=117996385075.2000 entropy=3.7290 ent_coef=0.001938 approx_kl=0.0022 kl_stop=1 intervention_rate=0.0749 front_blocked=0
|
|
[Eval 6840] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-480099.4 mean_steps=12.9
|
|
[Episode 6850] reward=-50490171.3 actor_loss=0.1027 critic_loss=122921375744.0000 entropy=3.7244 ent_coef=0.001938 approx_kl=0.0039 kl_stop=1 intervention_rate=0.0827 front_blocked=0
|
|
[Episode 6860] reward=-55651752.9 actor_loss=0.1025 critic_loss=123352890221.7143 entropy=3.7231 ent_coef=0.001938 approx_kl=0.0033 kl_stop=1 intervention_rate=0.0697 front_blocked=0
|
|
[Eval 6860] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-456644.8 mean_steps=13.8
|
|
[Episode 6870] reward=-47527069.8 actor_loss=0.1129 critic_loss=123142120243.2000 entropy=3.7210 ent_coef=0.001938 approx_kl=0.0014 kl_stop=1 intervention_rate=0.0885 front_blocked=0
|
|
[Episode 6880] reward=-53148653.5 actor_loss=0.0975 critic_loss=123693393361.4545 entropy=3.7197 ent_coef=0.001938 approx_kl=0.0031 kl_stop=1 intervention_rate=0.0807 front_blocked=0
|
|
[Eval 6880] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-607249.6 mean_steps=11.7
|
|
[Episode 6890] reward=-57250524.2 actor_loss=0.1430 critic_loss=122686308352.0000 entropy=3.7161 ent_coef=0.001938 approx_kl=0.0026 kl_stop=1 intervention_rate=0.0814 front_blocked=0
|
|
[Episode 6900] reward=-52448931.9 actor_loss=0.1321 critic_loss=122379273830.4000 entropy=3.7156 ent_coef=0.001938 approx_kl=0.0004 kl_stop=1 intervention_rate=0.0833 front_blocked=0
|
|
[Eval 6900] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-562393.0 mean_steps=12.1
|
|
[Episode 6910] reward=-55239300.5 actor_loss=0.1212 critic_loss=124615653376.0000 entropy=3.7159 ent_coef=0.001938 approx_kl=0.0013 kl_stop=1 intervention_rate=0.0859 front_blocked=0
|
|
[Episode 6920] reward=-46056607.6 actor_loss=0.1102 critic_loss=120474661319.1111 entropy=3.7189 ent_coef=0.001938 approx_kl=0.0038 kl_stop=1 intervention_rate=0.0775 front_blocked=0
|
|
[Eval 6920] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-374955.4 mean_steps=14.8
|
|
[Episode 6930] reward=-55813323.3 actor_loss=0.1336 critic_loss=126929360709.8182 entropy=3.7192 ent_coef=0.001938 approx_kl=0.0023 kl_stop=1 intervention_rate=0.0879 front_blocked=0
|
|
[Episode 6940] reward=-55062737.1 actor_loss=0.1352 critic_loss=124454887424.0000 entropy=3.7189 ent_coef=0.001938 approx_kl=0.0037 kl_stop=1 intervention_rate=0.0853 front_blocked=0
|
|
[Eval 6940] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-505557.5 mean_steps=13.1
|
|
[Episode 6950] reward=-54379446.1 actor_loss=0.1215 critic_loss=124297930524.4444 entropy=3.7149 ent_coef=0.001937 approx_kl=0.0020 kl_stop=1 intervention_rate=0.0872 front_blocked=0
|
|
[Episode 6960] reward=-59025550.2 actor_loss=0.1052 critic_loss=125914839527.6190 entropy=3.7119 ent_coef=0.001937 approx_kl=0.0036 kl_stop=1 intervention_rate=0.0892 front_blocked=0
|
|
[Eval 6960] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-457130.7 mean_steps=13.4
|
|
[Episode 6970] reward=-48560096.4 actor_loss=0.1275 critic_loss=119053917184.0000 entropy=3.7082 ent_coef=0.001937 approx_kl=0.0033 kl_stop=1 intervention_rate=0.0788 front_blocked=0
|
|
[Episode 6980] reward=-49822129.8 actor_loss=0.1005 critic_loss=121624166400.0000 entropy=3.7093 ent_coef=0.001937 approx_kl=0.0018 kl_stop=1 intervention_rate=0.0846 front_blocked=0
|
|
[Eval 6980] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-419896.1 mean_steps=14.4
|
|
[Episode 6990] reward=-60724552.7 actor_loss=0.1251 critic_loss=128851163545.6000 entropy=3.7108 ent_coef=0.001937 approx_kl=0.0047 kl_stop=1 intervention_rate=0.0938 front_blocked=0
|
|
[Episode 7000] reward=-67404900.6 actor_loss=0.1347 critic_loss=136989390438.4000 entropy=3.7096 ent_coef=0.001937 approx_kl=0.0040 kl_stop=1 intervention_rate=0.0931 front_blocked=0
|
|
[Eval 7000] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-618659.4 mean_steps=10.7
|
|
[Episode 7010] reward=-61018444.9 actor_loss=0.1077 critic_loss=131922599936.0000 entropy=3.7115 ent_coef=0.001937 approx_kl=0.0031 kl_stop=1 intervention_rate=0.0918 front_blocked=0
|
|
[Episode 7020] reward=-50113164.5 actor_loss=0.1124 critic_loss=121475315029.3333 entropy=3.7088 ent_coef=0.001937 approx_kl=0.0009 kl_stop=1 intervention_rate=0.0814 front_blocked=0
|
|
[Eval 7020] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-534075.5 mean_steps=11.2
|
|
[Episode 7030] reward=-56015941.0 actor_loss=0.1265 critic_loss=128771383296.0000 entropy=3.7060 ent_coef=0.001937 approx_kl=0.0026 kl_stop=1 intervention_rate=0.0872 front_blocked=0
|
|
[Episode 7040] reward=-59607566.0 actor_loss=0.1338 critic_loss=132952494395.0769 entropy=3.7071 ent_coef=0.001937 approx_kl=0.0036 kl_stop=1 intervention_rate=0.0885 front_blocked=0
|
|
[Eval 7040] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-282426.4 mean_steps=15.6
|
|
[Episode 7050] reward=-47168480.4 actor_loss=0.0967 critic_loss=122603882086.4000 entropy=3.7081 ent_coef=0.001937 approx_kl=0.0044 kl_stop=1 intervention_rate=0.0762 front_blocked=0
|
|
[Episode 7060] reward=-56173568.0 actor_loss=0.1273 critic_loss=122144306790.4000 entropy=3.7123 ent_coef=0.001936 approx_kl=0.0023 kl_stop=1 intervention_rate=0.0833 front_blocked=0
|
|
[Eval 7060] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-319460.4 mean_steps=14.2
|
|
[Episode 7070] reward=-43437917.3 actor_loss=0.1056 critic_loss=115485728182.8571 entropy=3.7082 ent_coef=0.001936 approx_kl=0.0041 kl_stop=1 intervention_rate=0.0729 front_blocked=0
|
|
[Episode 7080] reward=-61864971.3 actor_loss=0.1225 critic_loss=130945646592.0000 entropy=3.7076 ent_coef=0.001936 approx_kl=0.0048 kl_stop=1 intervention_rate=0.0814 front_blocked=0
|
|
[Eval 7080] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-388811.5 mean_steps=14.3
|
|
[Episode 7090] reward=-57442785.7 actor_loss=0.1244 critic_loss=129040999424.0000 entropy=3.7056 ent_coef=0.001936 approx_kl=0.0029 kl_stop=1 intervention_rate=0.0807 front_blocked=0
|
|
[Episode 7100] reward=-50644653.1 actor_loss=0.0928 critic_loss=128158878378.6667 entropy=3.7071 ent_coef=0.001936 approx_kl=0.0037 kl_stop=1 intervention_rate=0.0775 front_blocked=0
|
|
[Eval 7100] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-590060.0 mean_steps=12.8
|
|
[Episode 7110] reward=-60401974.6 actor_loss=0.0743 critic_loss=129986478080.0000 entropy=3.7096 ent_coef=0.001936 approx_kl=0.0030 kl_stop=1 intervention_rate=0.0853 front_blocked=0
|
|
[Episode 7120] reward=-62661059.0 actor_loss=0.1260 critic_loss=126854422528.0000 entropy=3.7087 ent_coef=0.001936 approx_kl=0.0014 kl_stop=1 intervention_rate=0.0885 front_blocked=0
|
|
[Eval 7120] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-463247.4 mean_steps=13.6
|
|
[Episode 7130] reward=-52757905.5 actor_loss=0.1009 critic_loss=122688081547.6364 entropy=3.7061 ent_coef=0.001936 approx_kl=0.0031 kl_stop=1 intervention_rate=0.0723 front_blocked=0
|
|
[Episode 7140] reward=-54139712.0 actor_loss=0.0943 critic_loss=125716832256.0000 entropy=3.7085 ent_coef=0.001936 approx_kl=0.0024 kl_stop=1 intervention_rate=0.0762 front_blocked=0
|
|
[Eval 7140] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-356741.3 mean_steps=14.6
|
|
[Episode 7150] reward=-47909920.0 actor_loss=0.1008 critic_loss=123551236096.0000 entropy=3.7078 ent_coef=0.001936 approx_kl=0.0029 kl_stop=1 intervention_rate=0.0710 front_blocked=0
|
|
[Episode 7160] reward=-49555291.6 actor_loss=0.1085 critic_loss=121423229300.3636 entropy=3.7068 ent_coef=0.001936 approx_kl=0.0034 kl_stop=1 intervention_rate=0.0768 front_blocked=0
|
|
[Eval 7160] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-487941.2 mean_steps=14.6
|
|
[Episode 7170] reward=-56655491.6 actor_loss=0.1164 critic_loss=128359725056.0000 entropy=3.7013 ent_coef=0.001935 approx_kl=0.0030 kl_stop=1 intervention_rate=0.0814 front_blocked=0
|
|
[Episode 7180] reward=-63400569.0 actor_loss=0.1267 critic_loss=127001534464.0000 entropy=3.7017 ent_coef=0.001935 approx_kl=0.0033 kl_stop=1 intervention_rate=0.0938 front_blocked=0
|
|
[Eval 7180] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-502248.6 mean_steps=13.0
|
|
[Episode 7190] reward=-49559999.1 actor_loss=0.0796 critic_loss=123949688149.3333 entropy=3.7002 ent_coef=0.001935 approx_kl=0.0020 kl_stop=1 intervention_rate=0.0723 front_blocked=0
|
|
[Episode 7200] reward=-50083456.3 actor_loss=0.0902 critic_loss=120258195828.3636 entropy=3.6960 ent_coef=0.001935 approx_kl=0.0018 kl_stop=1 intervention_rate=0.0794 front_blocked=0
|
|
[Eval 7200] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-595676.3 mean_steps=11.3
|
|
[Episode 7210] reward=-56955978.3 actor_loss=0.1035 critic_loss=123637863517.0909 entropy=3.6968 ent_coef=0.001935 approx_kl=0.0016 kl_stop=1 intervention_rate=0.0853 front_blocked=0
|
|
[Episode 7220] reward=-52524031.7 actor_loss=0.1151 critic_loss=123344977920.0000 entropy=3.6991 ent_coef=0.001935 approx_kl=0.0044 kl_stop=1 intervention_rate=0.0814 front_blocked=0
|
|
[Eval 7220] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-482681.3 mean_steps=12.9
|
|
[Episode 7230] reward=-63302979.2 actor_loss=0.1238 critic_loss=133351294417.4545 entropy=3.6980 ent_coef=0.001935 approx_kl=0.0021 kl_stop=1 intervention_rate=0.0859 front_blocked=0
|
|
[Episode 7240] reward=-59701035.6 actor_loss=0.1273 critic_loss=126478617941.3333 entropy=3.6956 ent_coef=0.001935 approx_kl=0.0035 kl_stop=1 intervention_rate=0.0892 front_blocked=0
|
|
[Eval 7240] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-486361.2 mean_steps=13.5
|
|
[Episode 7250] reward=-53480528.4 actor_loss=0.0982 critic_loss=126828148326.4000 entropy=3.6942 ent_coef=0.001935 approx_kl=0.0001 kl_stop=1 intervention_rate=0.0794 front_blocked=0
|
|
[Episode 7260] reward=-51201531.1 actor_loss=0.1094 critic_loss=123838790509.7143 entropy=3.6977 ent_coef=0.001935 approx_kl=0.0038 kl_stop=1 intervention_rate=0.0846 front_blocked=0
|
|
[Eval 7260] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-432415.1 mean_steps=14.4
|
|
[Episode 7270] reward=-62909571.2 actor_loss=0.1263 critic_loss=126327437019.4286 entropy=3.7002 ent_coef=0.001935 approx_kl=0.0045 kl_stop=1 intervention_rate=0.0892 front_blocked=0
|
|
[Episode 7280] reward=-52730508.8 actor_loss=0.1258 critic_loss=120652646400.0000 entropy=3.7015 ent_coef=0.001934 approx_kl=0.0034 kl_stop=1 intervention_rate=0.0853 front_blocked=0
|
|
[Eval 7280] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-341507.9 mean_steps=14.3
|
|
[Episode 7290] reward=-42789921.1 actor_loss=0.1014 critic_loss=118836094566.4000 entropy=3.7009 ent_coef=0.001934 approx_kl=0.0032 kl_stop=1 intervention_rate=0.0710 front_blocked=0
|
|
[Episode 7300] reward=-51276605.3 actor_loss=0.1090 critic_loss=123325575850.6667 entropy=3.6985 ent_coef=0.001934 approx_kl=0.0030 kl_stop=1 intervention_rate=0.0794 front_blocked=0
|
|
[Eval 7300] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-441239.5 mean_steps=12.5
|
|
[Episode 7310] reward=-45890183.9 actor_loss=0.1127 critic_loss=121296959301.8182 entropy=3.6989 ent_coef=0.001934 approx_kl=0.0041 kl_stop=1 intervention_rate=0.0749 front_blocked=0
|
|
[Episode 7320] reward=-59638771.2 actor_loss=0.1131 critic_loss=127777744896.0000 entropy=3.6947 ent_coef=0.001934 approx_kl=0.0004 kl_stop=1 intervention_rate=0.0898 front_blocked=0
|
|
[Eval 7320] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-465205.9 mean_steps=13.7
|
|
[Episode 7330] reward=-50897575.7 actor_loss=0.1019 critic_loss=122847379456.0000 entropy=3.6944 ent_coef=0.001934 approx_kl=0.0032 kl_stop=1 intervention_rate=0.0820 front_blocked=0
|
|
[Episode 7340] reward=-55499772.2 actor_loss=0.1128 critic_loss=127286684330.6667 entropy=3.6939 ent_coef=0.001934 approx_kl=0.0024 kl_stop=1 intervention_rate=0.0807 front_blocked=0
|
|
[Eval 7340] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-590177.3 mean_steps=12.6
|
|
[Episode 7350] reward=-53920497.3 actor_loss=0.1127 critic_loss=124424042023.3846 entropy=3.6957 ent_coef=0.001934 approx_kl=0.0037 kl_stop=1 intervention_rate=0.0872 front_blocked=0
|
|
[Episode 7360] reward=-50508817.3 actor_loss=0.0994 critic_loss=121718184618.6667 entropy=3.6946 ent_coef=0.001934 approx_kl=0.0021 kl_stop=1 intervention_rate=0.0846 front_blocked=0
|
|
[Eval 7360] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-335122.5 mean_steps=13.2
|
|
[Episode 7370] reward=-58948476.2 actor_loss=0.1029 critic_loss=128669147136.0000 entropy=3.6953 ent_coef=0.001934 approx_kl=0.0029 kl_stop=1 intervention_rate=0.0827 front_blocked=0
|
|
[Episode 7380] reward=-61596223.1 actor_loss=0.1315 critic_loss=127801878016.0000 entropy=3.6969 ent_coef=0.001934 approx_kl=0.0030 kl_stop=1 intervention_rate=0.0866 front_blocked=0
|
|
[Eval 7380] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-491503.2 mean_steps=12.9
|
|
[Episode 7390] reward=-51566064.6 actor_loss=0.1050 critic_loss=123596945408.0000 entropy=3.6970 ent_coef=0.001933 approx_kl=0.0025 kl_stop=1 intervention_rate=0.0768 front_blocked=0
|
|
[Episode 7400] reward=-53526886.0 actor_loss=0.1270 critic_loss=123246670039.5789 entropy=3.6975 ent_coef=0.001933 approx_kl=0.0021 kl_stop=1 intervention_rate=0.0859 front_blocked=0
|
|
[Eval 7400] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-376610.1 mean_steps=13.7
|
|
[Episode 7410] reward=-57396498.2 actor_loss=0.1467 critic_loss=130713645428.3636 entropy=3.6990 ent_coef=0.001933 approx_kl=0.0011 kl_stop=1 intervention_rate=0.0964 front_blocked=0
|
|
[Episode 7420] reward=-52667780.2 actor_loss=0.1305 critic_loss=124220769894.4000 entropy=3.6950 ent_coef=0.001933 approx_kl=0.0042 kl_stop=1 intervention_rate=0.0794 front_blocked=0
|
|
[Eval 7420] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-388076.9 mean_steps=14.6
|
|
[Episode 7430] reward=-48445498.5 actor_loss=0.1270 critic_loss=123242882844.4444 entropy=3.6963 ent_coef=0.001933 approx_kl=0.0038 kl_stop=1 intervention_rate=0.0755 front_blocked=0
|
|
[Episode 7440] reward=-56269484.2 actor_loss=0.1307 critic_loss=125070958592.0000 entropy=3.6967 ent_coef=0.001933 approx_kl=0.0046 kl_stop=1 intervention_rate=0.0872 front_blocked=0
|
|
[Eval 7440] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-531300.5 mean_steps=11.6
|
|
[Episode 7450] reward=-49600240.5 actor_loss=0.1256 critic_loss=120057705472.0000 entropy=3.6910 ent_coef=0.001933 approx_kl=0.0036 kl_stop=1 intervention_rate=0.0814 front_blocked=0
|
|
[Episode 7460] reward=-56750479.7 actor_loss=0.1393 critic_loss=125650020124.4444 entropy=3.6883 ent_coef=0.001933 approx_kl=0.0024 kl_stop=1 intervention_rate=0.0951 front_blocked=0
|
|
[Eval 7460] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-426579.7 mean_steps=14.0
|
|
[Episode 7470] reward=-56686804.7 actor_loss=0.1089 critic_loss=128316068212.3636 entropy=3.6912 ent_coef=0.001933 approx_kl=0.0023 kl_stop=1 intervention_rate=0.0840 front_blocked=0
|
|
[Episode 7480] reward=-56351636.0 actor_loss=0.1053 critic_loss=126616672069.8182 entropy=3.6915 ent_coef=0.001933 approx_kl=0.0021 kl_stop=1 intervention_rate=0.0872 front_blocked=0
|
|
[Eval 7480] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-483398.8 mean_steps=13.1
|
|
[Episode 7490] reward=-65449764.8 actor_loss=0.1450 critic_loss=133718773760.0000 entropy=3.6913 ent_coef=0.001933 approx_kl=0.0034 kl_stop=1 intervention_rate=0.0879 front_blocked=0
|
|
[Episode 7500] reward=-50165501.0 actor_loss=0.1113 critic_loss=123651534028.8000 entropy=3.6915 ent_coef=0.001933 approx_kl=0.0029 kl_stop=1 intervention_rate=0.0768 front_blocked=0
|
|
[Eval 7500] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-452079.1 mean_steps=12.8
|
|
[Episode 7510] reward=-65019518.0 actor_loss=0.0992 critic_loss=135395999744.0000 entropy=3.6913 ent_coef=0.001932 approx_kl=0.0033 kl_stop=1 intervention_rate=0.0879 front_blocked=0
|
|
[Episode 7520] reward=-57707121.4 actor_loss=0.1048 critic_loss=129441937408.0000 entropy=3.6867 ent_coef=0.001932 approx_kl=0.0040 kl_stop=1 intervention_rate=0.0820 front_blocked=0
|
|
[Eval 7520] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-509408.5 mean_steps=13.0
|
|
[Episode 7530] reward=-57552918.2 actor_loss=0.1128 critic_loss=121517805112.8889 entropy=3.6844 ent_coef=0.001932 approx_kl=0.0029 kl_stop=1 intervention_rate=0.0892 front_blocked=0
|
|
[Episode 7540] reward=-38413021.5 actor_loss=0.0854 critic_loss=114593037165.7143 entropy=3.6856 ent_coef=0.001932 approx_kl=0.0039 kl_stop=1 intervention_rate=0.0690 front_blocked=0
|
|
[Eval 7540] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-479857.2 mean_steps=13.6
|
|
[Episode 7550] reward=-51183921.5 actor_loss=0.0763 critic_loss=120772345111.2727 entropy=3.6849 ent_coef=0.001932 approx_kl=0.0020 kl_stop=1 intervention_rate=0.0749 front_blocked=0
|
|
[Episode 7560] reward=-58855079.7 actor_loss=0.1280 critic_loss=125253744640.0000 entropy=3.6831 ent_coef=0.001932 approx_kl=-0.0001 kl_stop=1 intervention_rate=0.0898 front_blocked=0
|
|
[Eval 7560] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-501375.6 mean_steps=13.1
|
|
[Episode 7570] reward=-49266745.1 actor_loss=0.1166 critic_loss=121151891456.0000 entropy=3.6833 ent_coef=0.001932 approx_kl=0.0030 kl_stop=1 intervention_rate=0.0775 front_blocked=0
|
|
[Episode 7580] reward=-66532523.8 actor_loss=0.1463 critic_loss=133094922240.0000 entropy=3.6816 ent_coef=0.001932 approx_kl=0.0034 kl_stop=1 intervention_rate=0.0905 front_blocked=0
|
|
[Eval 7580] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-377730.6 mean_steps=15.2
|
|
[Episode 7590] reward=-53599338.6 actor_loss=0.1403 critic_loss=128040134842.1818 entropy=3.6822 ent_coef=0.001932 approx_kl=0.0034 kl_stop=1 intervention_rate=0.0853 front_blocked=0
|
|
[Episode 7600] reward=-48833361.3 actor_loss=0.1174 critic_loss=120712604330.6667 entropy=3.6819 ent_coef=0.001932 approx_kl=0.0033 kl_stop=1 intervention_rate=0.0820 front_blocked=0
|
|
[Eval 7600] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-431058.9 mean_steps=14.8
|
|
[Episode 7610] reward=-53029645.2 actor_loss=0.1477 critic_loss=125824472615.3846 entropy=3.6816 ent_coef=0.001932 approx_kl=0.0040 kl_stop=1 intervention_rate=0.0892 front_blocked=0
|
|
[Episode 7620] reward=-52865807.4 actor_loss=0.1131 critic_loss=128815186664.7273 entropy=3.6817 ent_coef=0.001931 approx_kl=0.0020 kl_stop=1 intervention_rate=0.0866 front_blocked=0
|
|
[Eval 7620] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-333008.6 mean_steps=15.0
|
|
[Episode 7630] reward=-61668319.6 actor_loss=0.0994 critic_loss=126629321841.7778 entropy=3.6808 ent_coef=0.001931 approx_kl=0.0025 kl_stop=1 intervention_rate=0.0879 front_blocked=0
|
|
[Episode 7640] reward=-49630105.2 actor_loss=0.1279 critic_loss=125266012160.0000 entropy=3.6801 ent_coef=0.001931 approx_kl=0.0041 kl_stop=1 intervention_rate=0.0762 front_blocked=0
|
|
[Eval 7640] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-483522.8 mean_steps=12.7
|
|
[Episode 7650] reward=-53552947.2 actor_loss=0.0945 critic_loss=125339939188.3636 entropy=3.6804 ent_coef=0.001931 approx_kl=0.0022 kl_stop=1 intervention_rate=0.0840 front_blocked=0
|
|
[Episode 7660] reward=-44745686.5 actor_loss=0.1047 critic_loss=120813628074.6667 entropy=3.6806 ent_coef=0.001931 approx_kl=0.0020 kl_stop=1 intervention_rate=0.0723 front_blocked=0
|
|
[Eval 7660] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-493380.6 mean_steps=13.0
|
|
[Episode 7670] reward=-49205295.7 actor_loss=0.0873 critic_loss=124054872064.0000 entropy=3.6792 ent_coef=0.001931 approx_kl=0.0025 kl_stop=1 intervention_rate=0.0794 front_blocked=0
|
|
[Episode 7680] reward=-50631416.4 actor_loss=0.1368 critic_loss=122480679789.7143 entropy=3.6809 ent_coef=0.001931 approx_kl=0.0030 kl_stop=1 intervention_rate=0.0872 front_blocked=0
|
|
[Eval 7680] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-449756.9 mean_steps=14.2
|
|
[Episode 7690] reward=-55309169.0 actor_loss=0.0968 critic_loss=124438823936.0000 entropy=3.6800 ent_coef=0.001931 approx_kl=0.0040 kl_stop=1 intervention_rate=0.0872 front_blocked=0
|
|
[Episode 7700] reward=-57348261.4 actor_loss=0.1110 critic_loss=128917754973.0909 entropy=3.6811 ent_coef=0.001931 approx_kl=0.0052 kl_stop=1 intervention_rate=0.0859 front_blocked=0
|
|
[Eval 7700] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-577003.1 mean_steps=12.3
|
|
[Episode 7710] reward=-55942577.0 actor_loss=0.1089 critic_loss=124655151513.6000 entropy=3.6803 ent_coef=0.001931 approx_kl=0.0036 kl_stop=1 intervention_rate=0.0853 front_blocked=0
|
|
[Episode 7720] reward=-37399592.7 actor_loss=0.0995 critic_loss=113830656558.5455 entropy=3.6794 ent_coef=0.001931 approx_kl=0.0009 kl_stop=1 intervention_rate=0.0742 front_blocked=0
|
|
[Eval 7720] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-520945.9 mean_steps=13.2
|
|
[Episode 7730] reward=-49530211.1 actor_loss=0.1155 critic_loss=120405701632.0000 entropy=3.6742 ent_coef=0.001930 approx_kl=0.0030 kl_stop=1 intervention_rate=0.0781 front_blocked=0
|
|
[Episode 7740] reward=-59201913.5 actor_loss=0.1258 critic_loss=129519513972.3636 entropy=3.6733 ent_coef=0.001930 approx_kl=-0.0007 kl_stop=1 intervention_rate=0.0938 front_blocked=0
|
|
[Eval 7740] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-378350.4 mean_steps=13.8
|
|
[Episode 7750] reward=-56772228.4 actor_loss=0.1278 critic_loss=129490578090.6667 entropy=3.6742 ent_coef=0.001930 approx_kl=0.0037 kl_stop=1 intervention_rate=0.0846 front_blocked=0
|
|
[Episode 7760] reward=-53809596.2 actor_loss=0.1176 critic_loss=123454143692.8000 entropy=3.6721 ent_coef=0.001930 approx_kl=0.0025 kl_stop=1 intervention_rate=0.0846 front_blocked=0
|
|
[Eval 7760] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-303729.3 mean_steps=14.1
|
|
[Episode 7770] reward=-46709553.7 actor_loss=0.0906 critic_loss=124196102826.6667 entropy=3.6720 ent_coef=0.001930 approx_kl=0.0037 kl_stop=1 intervention_rate=0.0736 front_blocked=0
|
|
[Episode 7780] reward=-57146059.7 actor_loss=0.1015 critic_loss=129085227008.0000 entropy=3.6718 ent_coef=0.001930 approx_kl=0.0027 kl_stop=1 intervention_rate=0.0840 front_blocked=0
|
|
[Eval 7780] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-572045.6 mean_steps=12.1
|
|
[Episode 7790] reward=-54811240.9 actor_loss=0.1338 critic_loss=126218488100.5714 entropy=3.6773 ent_coef=0.001930 approx_kl=0.0040 kl_stop=1 intervention_rate=0.0840 front_blocked=0
|
|
[Episode 7800] reward=-58287667.9 actor_loss=0.1139 critic_loss=127474104133.8182 entropy=3.6795 ent_coef=0.001930 approx_kl=0.0024 kl_stop=1 intervention_rate=0.0814 front_blocked=0
|
|
[Eval 7800] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-340167.0 mean_steps=15.1
|
|
[Episode 7810] reward=-51657907.7 actor_loss=0.1322 critic_loss=123476711424.0000 entropy=3.6791 ent_coef=0.001930 approx_kl=0.0031 kl_stop=1 intervention_rate=0.0866 front_blocked=0
|
|
[Episode 7820] reward=-46932425.8 actor_loss=0.0980 critic_loss=118111261789.0909 entropy=3.6782 ent_coef=0.001930 approx_kl=0.0011 kl_stop=1 intervention_rate=0.0788 front_blocked=0
|
|
[Eval 7820] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-448260.0 mean_steps=13.2
|
|
[Episode 7830] reward=-68334841.3 actor_loss=0.1416 critic_loss=130697026673.7778 entropy=3.6797 ent_coef=0.001930 approx_kl=0.0034 kl_stop=1 intervention_rate=0.0905 front_blocked=0
|
|
[Episode 7840] reward=-55043450.5 actor_loss=0.0958 critic_loss=126809554124.8000 entropy=3.6822 ent_coef=0.001929 approx_kl=0.0038 kl_stop=1 intervention_rate=0.0853 front_blocked=0
|
|
[Eval 7840] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-446789.9 mean_steps=12.5
|
|
[Episode 7850] reward=-48498792.5 actor_loss=0.1114 critic_loss=121074921984.0000 entropy=3.6811 ent_coef=0.001929 approx_kl=0.0026 kl_stop=1 intervention_rate=0.0755 front_blocked=0
|
|
[Episode 7860] reward=-49183466.6 actor_loss=0.1154 critic_loss=123255686940.4444 entropy=3.6803 ent_coef=0.001929 approx_kl=0.0020 kl_stop=1 intervention_rate=0.0716 front_blocked=0
|
|
[Eval 7860] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-482026.3 mean_steps=13.7
|
|
[Episode 7870] reward=-51427112.1 actor_loss=0.0950 critic_loss=124467727798.8571 entropy=3.6832 ent_coef=0.001929 approx_kl=0.0021 kl_stop=1 intervention_rate=0.0918 front_blocked=0
|
|
[Episode 7880] reward=-40881519.9 actor_loss=0.0951 critic_loss=118218763832.8889 entropy=3.6797 ent_coef=0.001929 approx_kl=0.0040 kl_stop=1 intervention_rate=0.0742 front_blocked=0
|
|
[Eval 7880] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-421267.2 mean_steps=13.9
|
|
[Episode 7890] reward=-47840118.1 actor_loss=0.1213 critic_loss=119506687317.3333 entropy=3.6785 ent_coef=0.001929 approx_kl=0.0023 kl_stop=1 intervention_rate=0.0918 front_blocked=0
|
|
[Episode 7900] reward=-48274959.0 actor_loss=0.0949 critic_loss=120784619706.1818 entropy=3.6802 ent_coef=0.001929 approx_kl=0.0032 kl_stop=1 intervention_rate=0.0742 front_blocked=0
|
|
[Eval 7900] success_rate=0.100 qp_infeasible_rate=0.900 mean_return=-695853.1 mean_steps=10.3
|
|
[Episode 7910] reward=-52420157.2 actor_loss=0.1298 critic_loss=126398386761.1429 entropy=3.6779 ent_coef=0.001929 approx_kl=0.0030 kl_stop=1 intervention_rate=0.0807 front_blocked=0
|
|
[Episode 7920] reward=-53865776.1 actor_loss=0.1131 critic_loss=130801315840.0000 entropy=3.6783 ent_coef=0.001929 approx_kl=0.0032 kl_stop=1 intervention_rate=0.0755 front_blocked=0
|
|
[Eval 7920] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-571753.3 mean_steps=11.2
|
|
[Episode 7930] reward=-49929633.7 actor_loss=0.1097 critic_loss=119622114417.7778 entropy=3.6774 ent_coef=0.001929 approx_kl=0.0009 kl_stop=1 intervention_rate=0.0710 front_blocked=0
|
|
[Episode 7940] reward=-55364446.1 actor_loss=0.1263 critic_loss=124969731218.2857 entropy=3.6802 ent_coef=0.001929 approx_kl=0.0024 kl_stop=1 intervention_rate=0.0840 front_blocked=0
|
|
[Eval 7940] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-574048.4 mean_steps=11.4
|
|
[Episode 7950] reward=-64643001.6 actor_loss=0.1328 critic_loss=131508252216.8889 entropy=3.6786 ent_coef=0.001928 approx_kl=0.0009 kl_stop=1 intervention_rate=0.0970 front_blocked=0
|
|
[Episode 7960] reward=-57370443.9 actor_loss=0.1375 critic_loss=124694410581.3333 entropy=3.6780 ent_coef=0.001928 approx_kl=0.0017 kl_stop=1 intervention_rate=0.0859 front_blocked=0
|
|
[Eval 7960] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-550047.4 mean_steps=11.9
|
|
[Episode 7970] reward=-54303688.7 actor_loss=0.0919 critic_loss=124507054824.7273 entropy=3.6746 ent_coef=0.001928 approx_kl=0.0032 kl_stop=1 intervention_rate=0.0807 front_blocked=0
|
|
[Episode 7980] reward=-43734039.6 actor_loss=0.0796 critic_loss=119463369728.0000 entropy=3.6710 ent_coef=0.001928 approx_kl=0.0023 kl_stop=1 intervention_rate=0.0742 front_blocked=0
|
|
[Eval 7980] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-359182.2 mean_steps=14.1
|
|
[Episode 7990] reward=-52172574.8 actor_loss=0.1190 critic_loss=124360899925.3333 entropy=3.6714 ent_coef=0.001928 approx_kl=0.0014 kl_stop=1 intervention_rate=0.0833 front_blocked=0
|
|
[Episode 8000] reward=-66708980.2 actor_loss=0.1176 critic_loss=131540466073.6000 entropy=3.6711 ent_coef=0.001928 approx_kl=0.0040 kl_stop=1 intervention_rate=0.0905 front_blocked=0
|
|
[Eval 8000] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-338272.8 mean_steps=14.9
|
|
[Episode 8010] reward=-59670185.1 actor_loss=0.1194 critic_loss=130806698803.2000 entropy=3.6709 ent_coef=0.001928 approx_kl=0.0015 kl_stop=1 intervention_rate=0.0846 front_blocked=0
|
|
[Episode 8020] reward=-56463184.3 actor_loss=0.1248 critic_loss=129257718930.2857 entropy=3.6741 ent_coef=0.001928 approx_kl=0.0038 kl_stop=1 intervention_rate=0.0807 front_blocked=0
|
|
[Eval 8020] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-333564.0 mean_steps=14.2
|
|
[Episode 8030] reward=-43340954.8 actor_loss=0.1119 critic_loss=118110457400.8889 entropy=3.6721 ent_coef=0.001928 approx_kl=0.0025 kl_stop=1 intervention_rate=0.0755 front_blocked=0
|
|
[Episode 8040] reward=-56247764.3 actor_loss=0.0917 critic_loss=125930804019.2000 entropy=3.6711 ent_coef=0.001928 approx_kl=0.0037 kl_stop=1 intervention_rate=0.0801 front_blocked=0
|
|
[Eval 8040] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-523963.4 mean_steps=12.2
|
|
[Episode 8050] reward=-61986931.7 actor_loss=0.0973 critic_loss=131122264746.6667 entropy=3.6696 ent_coef=0.001928 approx_kl=0.0042 kl_stop=1 intervention_rate=0.0892 front_blocked=0
|
|
[Episode 8060] reward=-57337704.8 actor_loss=0.1157 critic_loss=122600759751.1111 entropy=3.6669 ent_coef=0.001927 approx_kl=0.0031 kl_stop=1 intervention_rate=0.0840 front_blocked=0
|
|
[Eval 8060] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-368986.0 mean_steps=14.4
|
|
[Episode 8070] reward=-46709339.2 actor_loss=0.1191 critic_loss=122783111577.6000 entropy=3.6695 ent_coef=0.001927 approx_kl=0.0031 kl_stop=1 intervention_rate=0.0807 front_blocked=0
|
|
[Episode 8080] reward=-57810130.2 actor_loss=0.1209 critic_loss=128568248320.0000 entropy=3.6716 ent_coef=0.001927 approx_kl=0.0040 kl_stop=1 intervention_rate=0.0853 front_blocked=0
|
|
[Eval 8080] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-355854.1 mean_steps=14.9
|
|
[Episode 8090] reward=-61790973.1 actor_loss=0.1142 critic_loss=131090861764.9231 entropy=3.6714 ent_coef=0.001927 approx_kl=0.0034 kl_stop=1 intervention_rate=0.0807 front_blocked=0
|
|
[Episode 8100] reward=-45922855.5 actor_loss=0.1121 critic_loss=121537576277.3333 entropy=3.6716 ent_coef=0.001927 approx_kl=0.0020 kl_stop=1 intervention_rate=0.0801 front_blocked=0
|
|
[Eval 8100] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-411279.8 mean_steps=13.1
|
|
[Episode 8110] reward=-51916203.1 actor_loss=0.1081 critic_loss=124553166848.0000 entropy=3.6657 ent_coef=0.001927 approx_kl=0.0029 kl_stop=1 intervention_rate=0.0872 front_blocked=0
|
|
[Episode 8120] reward=-50394551.4 actor_loss=0.1060 critic_loss=124573962240.0000 entropy=3.6630 ent_coef=0.001927 approx_kl=0.0044 kl_stop=1 intervention_rate=0.0846 front_blocked=0
|
|
[Eval 8120] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-620005.8 mean_steps=12.3
|
|
[Episode 8130] reward=-49683784.8 actor_loss=0.1029 critic_loss=123285970944.0000 entropy=3.6598 ent_coef=0.001927 approx_kl=0.0020 kl_stop=1 intervention_rate=0.0840 front_blocked=0
|
|
[Episode 8140] reward=-57655678.4 actor_loss=0.1148 critic_loss=126921351987.2000 entropy=3.6581 ent_coef=0.001927 approx_kl=0.0031 kl_stop=1 intervention_rate=0.0885 front_blocked=0
|
|
[Eval 8140] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-517279.1 mean_steps=12.3
|
|
[Episode 8150] reward=-55246819.5 actor_loss=0.1119 critic_loss=125720545280.0000 entropy=3.6559 ent_coef=0.001927 approx_kl=0.0026 kl_stop=1 intervention_rate=0.0911 front_blocked=0
|
|
[Episode 8160] reward=-52408178.5 actor_loss=0.1069 critic_loss=123719342080.0000 entropy=3.6547 ent_coef=0.001927 approx_kl=0.0041 kl_stop=1 intervention_rate=0.0827 front_blocked=0
|
|
[Eval 8160] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-606359.4 mean_steps=12.2
|
|
[Episode 8170] reward=-52089907.3 actor_loss=0.1315 critic_loss=124859478388.3636 entropy=3.6540 ent_coef=0.001926 approx_kl=0.0026 kl_stop=1 intervention_rate=0.0788 front_blocked=0
|
|
[Episode 8180] reward=-55165764.4 actor_loss=0.1330 critic_loss=127115115633.7778 entropy=3.6588 ent_coef=0.001926 approx_kl=0.0020 kl_stop=1 intervention_rate=0.0898 front_blocked=0
|
|
[Eval 8180] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-421217.1 mean_steps=13.9
|
|
[Episode 8190] reward=-55927225.7 actor_loss=0.1175 critic_loss=125532489728.0000 entropy=3.6597 ent_coef=0.001926 approx_kl=0.0019 kl_stop=1 intervention_rate=0.0872 front_blocked=0
|
|
[Episode 8200] reward=-56269656.0 actor_loss=0.1339 critic_loss=128549494784.0000 entropy=3.6604 ent_coef=0.001926 approx_kl=0.0041 kl_stop=1 intervention_rate=0.0944 front_blocked=0
|
|
[Eval 8200] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-563526.6 mean_steps=11.1
|
|
[Episode 8210] reward=-62771256.8 actor_loss=0.1267 critic_loss=129553856512.0000 entropy=3.6552 ent_coef=0.001926 approx_kl=0.0024 kl_stop=1 intervention_rate=0.0911 front_blocked=0
|
|
[Episode 8220] reward=-64554407.8 actor_loss=0.1013 critic_loss=131197495296.0000 entropy=3.6534 ent_coef=0.001926 approx_kl=0.0033 kl_stop=1 intervention_rate=0.0859 front_blocked=0
|
|
[Eval 8220] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-570847.1 mean_steps=12.8
|
|
[Episode 8230] reward=-54230935.8 actor_loss=0.1057 critic_loss=124325868544.0000 entropy=3.6542 ent_coef=0.001926 approx_kl=0.0041 kl_stop=1 intervention_rate=0.0833 front_blocked=0
|
|
[Episode 8240] reward=-54791032.3 actor_loss=0.0981 critic_loss=126034744241.2308 entropy=3.6504 ent_coef=0.001926 approx_kl=0.0025 kl_stop=1 intervention_rate=0.0846 front_blocked=0
|
|
[Eval 8240] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-552195.8 mean_steps=12.2
|
|
[Episode 8250] reward=-53456449.3 actor_loss=0.1145 critic_loss=122387360426.6667 entropy=3.6474 ent_coef=0.001926 approx_kl=0.0025 kl_stop=1 intervention_rate=0.0807 front_blocked=0
|
|
[Episode 8260] reward=-50047213.2 actor_loss=0.1000 critic_loss=126325845333.3333 entropy=3.6491 ent_coef=0.001926 approx_kl=0.0042 kl_stop=1 intervention_rate=0.0703 front_blocked=0
|
|
[Eval 8260] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-554435.9 mean_steps=11.7
|
|
[Episode 8270] reward=-54923073.0 actor_loss=0.0882 critic_loss=124060447539.2000 entropy=3.6523 ent_coef=0.001926 approx_kl=-0.0002 kl_stop=1 intervention_rate=0.0814 front_blocked=0
|
|
[Episode 8280] reward=-61189148.0 actor_loss=0.1132 critic_loss=130632337700.5714 entropy=3.6526 ent_coef=0.001925 approx_kl=0.0036 kl_stop=1 intervention_rate=0.0879 front_blocked=0
|
|
[Eval 8280] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-485877.2 mean_steps=12.4
|
|
[Episode 8290] reward=-66535461.3 actor_loss=0.1277 critic_loss=133307346944.0000 entropy=3.6492 ent_coef=0.001925 approx_kl=0.0022 kl_stop=1 intervention_rate=0.0924 front_blocked=0
|
|
[Episode 8300] reward=-49265589.7 actor_loss=0.0872 critic_loss=124604556902.4000 entropy=3.6530 ent_coef=0.001925 approx_kl=0.0034 kl_stop=1 intervention_rate=0.0690 front_blocked=0
|
|
[Eval 8300] success_rate=0.100 qp_infeasible_rate=0.900 mean_return=-671911.6 mean_steps=10.2
|
|
[Episode 8310] reward=-51298000.8 actor_loss=0.0592 critic_loss=120736466534.4000 entropy=3.6516 ent_coef=0.001925 approx_kl=0.0053 kl_stop=1 intervention_rate=0.0820 front_blocked=0
|
|
[Episode 8320] reward=-64123758.4 actor_loss=0.1318 critic_loss=134290968064.0000 entropy=3.6536 ent_coef=0.001925 approx_kl=0.0026 kl_stop=1 intervention_rate=0.0846 front_blocked=0
|
|
[Eval 8320] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-504310.6 mean_steps=13.3
|
|
[Episode 8330] reward=-53854837.4 actor_loss=0.1168 critic_loss=124035356829.5385 entropy=3.6539 ent_coef=0.001925 approx_kl=0.0021 kl_stop=1 intervention_rate=0.0833 front_blocked=0
|
|
[Episode 8340] reward=-53784268.2 actor_loss=0.0838 critic_loss=128394795008.0000 entropy=3.6502 ent_coef=0.001925 approx_kl=0.0037 kl_stop=1 intervention_rate=0.0807 front_blocked=0
|
|
[Eval 8340] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-432475.0 mean_steps=13.6
|
|
[Episode 8350] reward=-55449208.7 actor_loss=0.0964 critic_loss=128428734464.0000 entropy=3.6510 ent_coef=0.001925 approx_kl=0.0032 kl_stop=1 intervention_rate=0.0846 front_blocked=0
|
|
[Episode 8360] reward=-58365057.6 actor_loss=0.1095 critic_loss=129854374912.0000 entropy=3.6501 ent_coef=0.001925 approx_kl=0.0044 kl_stop=1 intervention_rate=0.0892 front_blocked=0
|
|
[Eval 8360] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-457108.0 mean_steps=13.6
|
|
[Episode 8370] reward=-56973957.5 actor_loss=0.1317 critic_loss=128833339392.0000 entropy=3.6471 ent_coef=0.001925 approx_kl=0.0039 kl_stop=1 intervention_rate=0.0846 front_blocked=0
|
|
[Episode 8380] reward=-45853926.1 actor_loss=0.0994 critic_loss=120462419285.3333 entropy=3.6424 ent_coef=0.001925 approx_kl=0.0036 kl_stop=1 intervention_rate=0.0749 front_blocked=0
|
|
[Eval 8380] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-437694.0 mean_steps=12.6
|
|
[Episode 8390] reward=-55163682.3 actor_loss=0.0913 critic_loss=125636609638.4000 entropy=3.6378 ent_coef=0.001924 approx_kl=0.0023 kl_stop=1 intervention_rate=0.0762 front_blocked=0
|
|
[Episode 8400] reward=-53798656.8 actor_loss=0.1145 critic_loss=124607370791.3846 entropy=3.6357 ent_coef=0.001924 approx_kl=0.0024 kl_stop=1 intervention_rate=0.0807 front_blocked=0
|
|
[Eval 8400] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-377904.2 mean_steps=13.7
|
|
[Episode 8410] reward=-53259482.6 actor_loss=0.1293 critic_loss=124498333696.0000 entropy=3.6324 ent_coef=0.001924 approx_kl=0.0021 kl_stop=1 intervention_rate=0.0840 front_blocked=0
|
|
[Episode 8420] reward=-65269398.5 actor_loss=0.1058 critic_loss=134234220134.4000 entropy=3.6289 ent_coef=0.001924 approx_kl=0.0020 kl_stop=1 intervention_rate=0.0938 front_blocked=0
|
|
[Eval 8420] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-521880.8 mean_steps=12.2
|
|
[Episode 8430] reward=-43944343.4 actor_loss=0.1097 critic_loss=122666292955.4286 entropy=3.6300 ent_coef=0.001924 approx_kl=0.0026 kl_stop=1 intervention_rate=0.0729 front_blocked=0
|
|
[Episode 8440] reward=-50774444.4 actor_loss=0.0924 critic_loss=125683113073.7778 entropy=3.6278 ent_coef=0.001924 approx_kl=0.0026 kl_stop=1 intervention_rate=0.0788 front_blocked=0
|
|
[Eval 8440] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-488733.9 mean_steps=13.9
|
|
[Episode 8450] reward=-62310537.3 actor_loss=0.1037 critic_loss=129125021509.8182 entropy=3.6242 ent_coef=0.001924 approx_kl=0.0013 kl_stop=1 intervention_rate=0.0898 front_blocked=0
|
|
[Episode 8460] reward=-52880532.9 actor_loss=0.0991 critic_loss=128216428088.8889 entropy=3.6229 ent_coef=0.001924 approx_kl=0.0037 kl_stop=1 intervention_rate=0.0801 front_blocked=0
|
|
[Eval 8460] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-484808.9 mean_steps=12.8
|
|
[Episode 8470] reward=-52615787.5 actor_loss=0.1209 critic_loss=125210109542.4000 entropy=3.6241 ent_coef=0.001924 approx_kl=0.0043 kl_stop=1 intervention_rate=0.0853 front_blocked=0
|
|
[Episode 8480] reward=-53070535.0 actor_loss=0.0800 critic_loss=124536351402.6667 entropy=3.6233 ent_coef=0.001924 approx_kl=0.0030 kl_stop=1 intervention_rate=0.0755 front_blocked=0
|
|
[Eval 8480] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-431529.4 mean_steps=14.1
|
|
[Episode 8490] reward=-59285090.4 actor_loss=0.1196 critic_loss=128329480005.8182 entropy=3.6208 ent_coef=0.001924 approx_kl=0.0027 kl_stop=1 intervention_rate=0.0885 front_blocked=0
|
|
[Episode 8500] reward=-62935038.3 actor_loss=0.1144 critic_loss=128819589705.1429 entropy=3.6178 ent_coef=0.001924 approx_kl=0.0032 kl_stop=1 intervention_rate=0.0938 front_blocked=0
|
|
[Eval 8500] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-512743.1 mean_steps=12.1
|
|
[Episode 8510] reward=-56462338.2 actor_loss=0.1165 critic_loss=127402963626.6667 entropy=3.6163 ent_coef=0.001923 approx_kl=0.0048 kl_stop=1 intervention_rate=0.0840 front_blocked=0
|
|
[Episode 8520] reward=-56760410.3 actor_loss=0.1045 critic_loss=127165150822.4000 entropy=3.6173 ent_coef=0.001923 approx_kl=0.0038 kl_stop=1 intervention_rate=0.0866 front_blocked=0
|
|
[Eval 8520] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-346715.1 mean_steps=14.2
|
|
[Episode 8530] reward=-54943592.8 actor_loss=0.1148 critic_loss=124614536630.8571 entropy=3.6169 ent_coef=0.001923 approx_kl=0.0014 kl_stop=1 intervention_rate=0.0827 front_blocked=0
|
|
[Episode 8540] reward=-53868862.3 actor_loss=0.1079 critic_loss=126136573952.0000 entropy=3.6177 ent_coef=0.001923 approx_kl=0.0033 kl_stop=1 intervention_rate=0.0820 front_blocked=0
|
|
[Eval 8540] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-365859.1 mean_steps=13.4
|
|
[Episode 8550] reward=-63621643.7 actor_loss=0.1116 critic_loss=131070104371.2000 entropy=3.6157 ent_coef=0.001923 approx_kl=0.0039 kl_stop=1 intervention_rate=0.0853 front_blocked=0
|
|
[Episode 8560] reward=-53168868.0 actor_loss=0.1513 critic_loss=126487908352.0000 entropy=3.6180 ent_coef=0.001923 approx_kl=0.0049 kl_stop=1 intervention_rate=0.0801 front_blocked=0
|
|
[Eval 8560] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-389219.7 mean_steps=13.8
|
|
[Episode 8570] reward=-57370733.0 actor_loss=0.1409 critic_loss=123238989824.0000 entropy=3.6183 ent_coef=0.001923 approx_kl=0.0006 kl_stop=1 intervention_rate=0.0859 front_blocked=0
|
|
[Episode 8580] reward=-58451729.5 actor_loss=0.1008 critic_loss=127820554240.0000 entropy=3.6166 ent_coef=0.001923 approx_kl=0.0032 kl_stop=1 intervention_rate=0.0833 front_blocked=0
|
|
[Eval 8580] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-509986.8 mean_steps=12.3
|
|
[Episode 8590] reward=-58007976.2 actor_loss=0.1169 critic_loss=125762108620.8000 entropy=3.6135 ent_coef=0.001923 approx_kl=0.0017 kl_stop=1 intervention_rate=0.0859 front_blocked=0
|
|
[Episode 8600] reward=-58216119.1 actor_loss=0.1119 critic_loss=125045574860.8000 entropy=3.6120 ent_coef=0.001923 approx_kl=0.0037 kl_stop=1 intervention_rate=0.0853 front_blocked=0
|
|
[Eval 8600] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-553174.2 mean_steps=12.6
|
|
[Episode 8610] reward=-61861968.6 actor_loss=0.1060 critic_loss=134441105408.0000 entropy=3.6105 ent_coef=0.001923 approx_kl=0.0034 kl_stop=1 intervention_rate=0.0911 front_blocked=0
|
|
[Episode 8620] reward=-50151070.9 actor_loss=0.1279 critic_loss=122815012317.8667 entropy=3.6069 ent_coef=0.001922 approx_kl=0.0039 kl_stop=1 intervention_rate=0.0801 front_blocked=0
|
|
[Eval 8620] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-485247.3 mean_steps=13.0
|
|
[Episode 8630] reward=-51487501.7 actor_loss=0.1165 critic_loss=122842782573.7143 entropy=3.6039 ent_coef=0.001922 approx_kl=0.0040 kl_stop=1 intervention_rate=0.0820 front_blocked=0
|
|
[Episode 8640] reward=-56633737.7 actor_loss=0.1364 critic_loss=126303186358.8571 entropy=3.6053 ent_coef=0.001922 approx_kl=0.0033 kl_stop=1 intervention_rate=0.0931 front_blocked=0
|
|
[Eval 8640] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-523609.3 mean_steps=12.5
|
|
[Episode 8650] reward=-57603428.4 actor_loss=0.1254 critic_loss=127520104448.0000 entropy=3.6016 ent_coef=0.001922 approx_kl=0.0026 kl_stop=1 intervention_rate=0.0885 front_blocked=0
|
|
[Episode 8660] reward=-63961943.7 actor_loss=0.1351 critic_loss=129415247189.3333 entropy=3.6010 ent_coef=0.001922 approx_kl=0.0025 kl_stop=1 intervention_rate=0.0970 front_blocked=0
|
|
[Eval 8660] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-434457.1 mean_steps=12.4
|
|
[Episode 8670] reward=-58918772.3 actor_loss=0.1220 critic_loss=126452440064.0000 entropy=3.6000 ent_coef=0.001922 approx_kl=0.0017 kl_stop=1 intervention_rate=0.0951 front_blocked=0
|
|
[Episode 8680] reward=-67714386.5 actor_loss=0.1160 critic_loss=131066231193.6000 entropy=3.6005 ent_coef=0.001922 approx_kl=0.0031 kl_stop=1 intervention_rate=0.0911 front_blocked=0
|
|
[Eval 8680] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-422686.9 mean_steps=14.0
|
|
[Episode 8690] reward=-54603659.7 actor_loss=0.1393 critic_loss=126823374848.0000 entropy=3.6006 ent_coef=0.001922 approx_kl=0.0011 kl_stop=1 intervention_rate=0.0814 front_blocked=0
|
|
[Episode 8700] reward=-48866108.4 actor_loss=0.0814 critic_loss=120829837312.0000 entropy=3.6015 ent_coef=0.001922 approx_kl=0.0047 kl_stop=1 intervention_rate=0.0794 front_blocked=0
|
|
[Eval 8700] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-717660.6 mean_steps=11.5
|
|
[Episode 8710] reward=-58375982.5 actor_loss=0.1417 critic_loss=129034802517.3333 entropy=3.6012 ent_coef=0.001922 approx_kl=0.0032 kl_stop=1 intervention_rate=0.0885 front_blocked=0
|
|
[Episode 8720] reward=-53584771.2 actor_loss=0.0885 critic_loss=122526471509.3333 entropy=3.6005 ent_coef=0.001922 approx_kl=0.0032 kl_stop=1 intervention_rate=0.0807 front_blocked=0
|
|
[Eval 8720] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-507011.2 mean_steps=13.6
|
|
[Episode 8730] reward=-58264926.2 actor_loss=0.1091 critic_loss=124916517050.1818 entropy=3.6012 ent_coef=0.001921 approx_kl=0.0036 kl_stop=1 intervention_rate=0.0911 front_blocked=0
|
|
[Episode 8740] reward=-56020820.0 actor_loss=0.1341 critic_loss=128882537813.3333 entropy=3.5996 ent_coef=0.001921 approx_kl=0.0030 kl_stop=1 intervention_rate=0.0814 front_blocked=0
|
|
[Eval 8740] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-537810.1 mean_steps=13.1
|
|
[Episode 8750] reward=-51317675.6 actor_loss=0.0978 critic_loss=124467733650.2857 entropy=3.5948 ent_coef=0.001921 approx_kl=0.0027 kl_stop=1 intervention_rate=0.0820 front_blocked=0
|
|
[Episode 8760] reward=-45912083.2 actor_loss=0.1067 critic_loss=123306369843.2000 entropy=3.5958 ent_coef=0.001921 approx_kl=0.0036 kl_stop=1 intervention_rate=0.0716 front_blocked=0
|
|
[Eval 8760] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-484645.8 mean_steps=13.1
|
|
[Episode 8770] reward=-56834404.0 actor_loss=0.0996 critic_loss=128407841645.7143 entropy=3.5940 ent_coef=0.001921 approx_kl=0.0023 kl_stop=1 intervention_rate=0.0892 front_blocked=0
|
|
[Episode 8780] reward=-48832002.9 actor_loss=0.0983 critic_loss=119419063864.8889 entropy=3.5921 ent_coef=0.001921 approx_kl=0.0035 kl_stop=1 intervention_rate=0.0762 front_blocked=0
|
|
[Eval 8780] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-511820.5 mean_steps=13.2
|
|
[Episode 8790] reward=-49821077.3 actor_loss=0.1053 critic_loss=120620635714.7826 entropy=3.5884 ent_coef=0.001921 approx_kl=0.0036 kl_stop=1 intervention_rate=0.0814 front_blocked=0
|
|
[Episode 8800] reward=-55554140.0 actor_loss=0.0963 critic_loss=126972954916.5714 entropy=3.5887 ent_coef=0.001921 approx_kl=0.0050 kl_stop=1 intervention_rate=0.0762 front_blocked=0
|
|
[Eval 8800] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-599867.2 mean_steps=12.3
|
|
[Episode 8810] reward=-53729792.0 actor_loss=0.1337 critic_loss=125845388141.7143 entropy=3.5878 ent_coef=0.001921 approx_kl=0.0055 kl_stop=1 intervention_rate=0.0905 front_blocked=0
|
|
[Episode 8820] reward=-51356150.3 actor_loss=0.1059 critic_loss=123336724024.8889 entropy=3.5838 ent_coef=0.001921 approx_kl=0.0042 kl_stop=1 intervention_rate=0.0742 front_blocked=0
|
|
[Eval 8820] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-559100.5 mean_steps=11.9
|
|
[Episode 8830] reward=-63408198.9 actor_loss=0.1207 critic_loss=128305527661.7143 entropy=3.5842 ent_coef=0.001921 approx_kl=0.0040 kl_stop=1 intervention_rate=0.0905 front_blocked=0
|
|
[Episode 8840] reward=-56732334.2 actor_loss=0.1285 critic_loss=125753709363.2000 entropy=3.5882 ent_coef=0.001920 approx_kl=0.0024 kl_stop=1 intervention_rate=0.0853 front_blocked=0
|
|
[Eval 8840] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-421786.9 mean_steps=12.6
|
|
[Episode 8850] reward=-45638262.1 actor_loss=0.1150 critic_loss=119746416640.0000 entropy=3.5889 ent_coef=0.001920 approx_kl=0.0024 kl_stop=1 intervention_rate=0.0827 front_blocked=0
|
|
[Episode 8860] reward=-41429562.4 actor_loss=0.0764 critic_loss=119158321971.2000 entropy=3.5888 ent_coef=0.001920 approx_kl=0.0071 kl_stop=1 intervention_rate=0.0690 front_blocked=0
|
|
[Eval 8860] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-377928.9 mean_steps=14.5
|
|
[Episode 8870] reward=-50879965.4 actor_loss=0.1101 critic_loss=123511130794.6667 entropy=3.5864 ent_coef=0.001920 approx_kl=0.0034 kl_stop=1 intervention_rate=0.0814 front_blocked=0
|
|
[Episode 8880] reward=-52196399.3 actor_loss=0.1222 critic_loss=127433349277.5385 entropy=3.5863 ent_coef=0.001920 approx_kl=0.0022 kl_stop=1 intervention_rate=0.0885 front_blocked=0
|
|
[Eval 8880] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-445896.8 mean_steps=14.3
|
|
[Episode 8890] reward=-57011177.6 actor_loss=0.1128 critic_loss=123174118741.3333 entropy=3.5849 ent_coef=0.001920 approx_kl=0.0045 kl_stop=1 intervention_rate=0.0879 front_blocked=0
|
|
[Episode 8900] reward=-57522988.3 actor_loss=0.1020 critic_loss=128487701708.8000 entropy=3.5824 ent_coef=0.001920 approx_kl=0.0027 kl_stop=1 intervention_rate=0.0853 front_blocked=0
|
|
[Eval 8900] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-357364.4 mean_steps=14.2
|
|
[Episode 8910] reward=-61752901.1 actor_loss=0.1219 critic_loss=131115956633.6000 entropy=3.5823 ent_coef=0.001920 approx_kl=0.0045 kl_stop=1 intervention_rate=0.0898 front_blocked=0
|
|
[Episode 8920] reward=-58741185.0 actor_loss=0.1039 critic_loss=132951482368.0000 entropy=3.5818 ent_coef=0.001920 approx_kl=0.0055 kl_stop=1 intervention_rate=0.0853 front_blocked=0
|
|
[Eval 8920] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-456775.0 mean_steps=12.6
|
|
[Episode 8930] reward=-43217943.5 actor_loss=0.0979 critic_loss=119765451233.8824 entropy=3.5795 ent_coef=0.001920 approx_kl=0.0028 kl_stop=1 intervention_rate=0.0768 front_blocked=0
|
|
[Episode 8940] reward=-54886089.1 actor_loss=0.1060 critic_loss=125859390929.4545 entropy=3.5734 ent_coef=0.001920 approx_kl=0.0038 kl_stop=1 intervention_rate=0.0859 front_blocked=0
|
|
[Eval 8940] success_rate=0.650 qp_infeasible_rate=0.350 mean_return=-187899.0 mean_steps=15.5
|
|
[Episode 8950] reward=-49709393.2 actor_loss=0.1350 critic_loss=121826520064.0000 entropy=3.5737 ent_coef=0.001919 approx_kl=0.0031 kl_stop=1 intervention_rate=0.0820 front_blocked=0
|
|
[Episode 8960] reward=-59552160.4 actor_loss=0.1311 critic_loss=127788131942.4000 entropy=3.5766 ent_coef=0.001919 approx_kl=0.0048 kl_stop=1 intervention_rate=0.0918 front_blocked=0
|
|
[Eval 8960] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-500497.4 mean_steps=12.3
|
|
[Episode 8970] reward=-45993595.4 actor_loss=0.0927 critic_loss=121829698764.8000 entropy=3.5787 ent_coef=0.001919 approx_kl=0.0025 kl_stop=1 intervention_rate=0.0749 front_blocked=0
|
|
[Episode 8980] reward=-55956780.7 actor_loss=0.1377 critic_loss=126181311556.2667 entropy=3.5775 ent_coef=0.001919 approx_kl=0.0042 kl_stop=1 intervention_rate=0.0918 front_blocked=0
|
|
[Eval 8980] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-462507.2 mean_steps=12.8
|
|
[Episode 8990] reward=-51835281.0 actor_loss=0.1145 critic_loss=122493482325.3333 entropy=3.5770 ent_coef=0.001919 approx_kl=0.0032 kl_stop=1 intervention_rate=0.0846 front_blocked=0
|
|
[Episode 9000] reward=-59659588.3 actor_loss=0.0866 critic_loss=129056201581.7143 entropy=3.5761 ent_coef=0.001919 approx_kl=0.0036 kl_stop=1 intervention_rate=0.0801 front_blocked=0
|
|
[Eval 9000] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-451170.6 mean_steps=14.3
|
|
[Episode 9010] reward=-60753642.9 actor_loss=0.1439 critic_loss=130097739483.4286 entropy=3.5790 ent_coef=0.001919 approx_kl=0.0020 kl_stop=1 intervention_rate=0.0911 front_blocked=0
|
|
[Episode 9020] reward=-56104799.7 actor_loss=0.1120 critic_loss=129434548451.5556 entropy=3.5767 ent_coef=0.001919 approx_kl=0.0042 kl_stop=1 intervention_rate=0.0859 front_blocked=0
|
|
[Eval 9020] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-543068.3 mean_steps=12.5
|
|
[Episode 9030] reward=-52887780.4 actor_loss=0.1200 critic_loss=127027609088.0000 entropy=3.5766 ent_coef=0.001919 approx_kl=0.0032 kl_stop=1 intervention_rate=0.0846 front_blocked=0
|
|
[Episode 9040] reward=-51162422.7 actor_loss=0.1028 critic_loss=122871039441.4545 entropy=3.5794 ent_coef=0.001919 approx_kl=0.0032 kl_stop=1 intervention_rate=0.0781 front_blocked=0
|
|
[Eval 9040] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-539245.9 mean_steps=12.8
|
|
[Episode 9050] reward=-47851802.9 actor_loss=0.0965 critic_loss=124572852809.1429 entropy=3.5789 ent_coef=0.001919 approx_kl=0.0038 kl_stop=1 intervention_rate=0.0788 front_blocked=0
|
|
[Episode 9060] reward=-57652632.3 actor_loss=0.1354 critic_loss=125964278784.0000 entropy=3.5802 ent_coef=0.001918 approx_kl=0.0045 kl_stop=1 intervention_rate=0.0898 front_blocked=0
|
|
[Eval 9060] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-362315.0 mean_steps=14.3
|
|
[Episode 9070] reward=-60009357.7 actor_loss=0.1195 critic_loss=124181018851.5556 entropy=3.5789 ent_coef=0.001918 approx_kl=0.0040 kl_stop=1 intervention_rate=0.0924 front_blocked=0
|
|
[Episode 9080] reward=-58671026.5 actor_loss=0.1485 critic_loss=127588453580.8000 entropy=3.5803 ent_coef=0.001918 approx_kl=0.0040 kl_stop=1 intervention_rate=0.0866 front_blocked=0
|
|
[Eval 9080] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-440646.1 mean_steps=14.2
|
|
[Episode 9090] reward=-65555648.6 actor_loss=0.1307 critic_loss=130700543044.2667 entropy=3.5795 ent_coef=0.001918 approx_kl=0.0013 kl_stop=1 intervention_rate=0.0977 front_blocked=0
|
|
[Episode 9100] reward=-51010392.6 actor_loss=0.1134 critic_loss=125326577208.8889 entropy=3.5749 ent_coef=0.001918 approx_kl=0.0023 kl_stop=1 intervention_rate=0.0820 front_blocked=0
|
|
[Eval 9100] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-474531.5 mean_steps=13.7
|
|
[Episode 9110] reward=-63007978.5 actor_loss=0.1170 critic_loss=133962157260.8000 entropy=3.5737 ent_coef=0.001918 approx_kl=0.0021 kl_stop=1 intervention_rate=0.0951 front_blocked=0
|
|
[Episode 9120] reward=-59175450.1 actor_loss=0.1432 critic_loss=127155503104.0000 entropy=3.5781 ent_coef=0.001918 approx_kl=0.0036 kl_stop=1 intervention_rate=0.0918 front_blocked=0
|
|
[Eval 9120] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-451629.2 mean_steps=12.7
|
|
[Episode 9130] reward=-52915951.7 actor_loss=0.1283 critic_loss=121007165440.0000 entropy=3.5790 ent_coef=0.001918 approx_kl=0.0042 kl_stop=1 intervention_rate=0.0859 front_blocked=0
|
|
[Episode 9140] reward=-48871752.5 actor_loss=0.1081 critic_loss=122046749696.0000 entropy=3.5792 ent_coef=0.001918 approx_kl=0.0035 kl_stop=1 intervention_rate=0.0788 front_blocked=0
|
|
[Eval 9140] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-385304.6 mean_steps=13.8
|
|
[Episode 9150] reward=-53826970.4 actor_loss=0.1063 critic_loss=124435079168.0000 entropy=3.5785 ent_coef=0.001918 approx_kl=0.0037 kl_stop=1 intervention_rate=0.0827 front_blocked=0
|
|
[Episode 9160] reward=-63506487.3 actor_loss=0.1253 critic_loss=130817029461.3333 entropy=3.5821 ent_coef=0.001918 approx_kl=0.0026 kl_stop=1 intervention_rate=0.0905 front_blocked=0
|
|
[Eval 9160] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-530942.4 mean_steps=11.0
|
|
[Episode 9170] reward=-43121907.7 actor_loss=0.0994 critic_loss=119055149242.1818 entropy=3.5820 ent_coef=0.001917 approx_kl=0.0029 kl_stop=1 intervention_rate=0.0736 front_blocked=0
|
|
[Episode 9180] reward=-64309874.5 actor_loss=0.1091 critic_loss=128405212598.8571 entropy=3.5823 ent_coef=0.001917 approx_kl=0.0023 kl_stop=1 intervention_rate=0.0905 front_blocked=0
|
|
[Eval 9180] success_rate=0.650 qp_infeasible_rate=0.350 mean_return=-270736.3 mean_steps=15.9
|
|
[Episode 9190] reward=-55208428.0 actor_loss=0.1223 critic_loss=124330955067.0769 entropy=3.5818 ent_coef=0.001917 approx_kl=0.0030 kl_stop=1 intervention_rate=0.0951 front_blocked=0
|
|
[Episode 9200] reward=-51255096.6 actor_loss=0.1093 critic_loss=124351219898.1818 entropy=3.5854 ent_coef=0.001917 approx_kl=0.0028 kl_stop=1 intervention_rate=0.0788 front_blocked=0
|
|
[Eval 9200] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-536059.0 mean_steps=12.6
|
|
[Episode 9210] reward=-46263342.1 actor_loss=0.1092 critic_loss=121933135872.0000 entropy=3.5840 ent_coef=0.001917 approx_kl=0.0040 kl_stop=1 intervention_rate=0.0788 front_blocked=0
|
|
[Episode 9220] reward=-54237864.8 actor_loss=0.1149 critic_loss=124755317760.0000 entropy=3.5804 ent_coef=0.001917 approx_kl=0.0046 kl_stop=1 intervention_rate=0.0814 front_blocked=0
|
|
[Eval 9220] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-416242.7 mean_steps=14.7
|
|
[Episode 9230] reward=-48392667.0 actor_loss=0.1154 critic_loss=124480138444.8000 entropy=3.5797 ent_coef=0.001917 approx_kl=0.0036 kl_stop=1 intervention_rate=0.0794 front_blocked=0
|
|
[Episode 9240] reward=-53542945.2 actor_loss=0.1088 critic_loss=124444428424.5333 entropy=3.5806 ent_coef=0.001917 approx_kl=0.0007 kl_stop=1 intervention_rate=0.0801 front_blocked=0
|
|
[Eval 9240] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-381864.3 mean_steps=14.2
|
|
[Episode 9250] reward=-62442372.1 actor_loss=0.1454 critic_loss=133590012359.1111 entropy=3.5817 ent_coef=0.001917 approx_kl=0.0040 kl_stop=1 intervention_rate=0.0970 front_blocked=0
|
|
[Episode 9260] reward=-66064949.7 actor_loss=0.1227 critic_loss=136634647552.0000 entropy=3.5815 ent_coef=0.001917 approx_kl=0.0019 kl_stop=1 intervention_rate=0.0911 front_blocked=0
|
|
[Eval 9260] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-484742.9 mean_steps=12.9
|
|
[Episode 9270] reward=-56100007.9 actor_loss=0.0983 critic_loss=123501302272.0000 entropy=3.5815 ent_coef=0.001917 approx_kl=0.0020 kl_stop=1 intervention_rate=0.0840 front_blocked=0
|
|
[Episode 9280] reward=-55799393.8 actor_loss=0.1239 critic_loss=126105015910.4000 entropy=3.5836 ent_coef=0.001916 approx_kl=0.0022 kl_stop=1 intervention_rate=0.0892 front_blocked=0
|
|
[Eval 9280] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-431829.2 mean_steps=13.3
|
|
[Episode 9290] reward=-61355821.1 actor_loss=0.0722 critic_loss=134143395430.4000 entropy=3.5827 ent_coef=0.001916 approx_kl=0.0017 kl_stop=1 intervention_rate=0.0911 front_blocked=0
|
|
[Episode 9300] reward=-57009487.3 actor_loss=0.1139 critic_loss=129300069229.7143 entropy=3.5825 ent_coef=0.001916 approx_kl=0.0031 kl_stop=1 intervention_rate=0.0918 front_blocked=0
|
|
[Eval 9300] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-432460.0 mean_steps=13.9
|
|
[Episode 9310] reward=-43216401.8 actor_loss=0.1140 critic_loss=120170917888.0000 entropy=3.5844 ent_coef=0.001916 approx_kl=0.0023 kl_stop=1 intervention_rate=0.0749 front_blocked=0
|
|
[Episode 9320] reward=-56990339.8 actor_loss=0.1482 critic_loss=125166080585.1429 entropy=3.5832 ent_coef=0.001916 approx_kl=0.0024 kl_stop=1 intervention_rate=0.0827 front_blocked=0
|
|
[Eval 9320] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-527223.3 mean_steps=12.4
|
|
[Episode 9330] reward=-57100107.9 actor_loss=0.1265 critic_loss=125042291507.2000 entropy=3.5837 ent_coef=0.001916 approx_kl=0.0027 kl_stop=1 intervention_rate=0.0898 front_blocked=0
|
|
[Episode 9340] reward=-62338286.3 actor_loss=0.1137 critic_loss=133728588721.2308 entropy=3.5834 ent_coef=0.001916 approx_kl=0.0034 kl_stop=1 intervention_rate=0.0924 front_blocked=0
|
|
[Eval 9340] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-501960.2 mean_steps=12.1
|
|
[Episode 9350] reward=-48759979.8 actor_loss=0.0996 critic_loss=125339820032.0000 entropy=3.5827 ent_coef=0.001916 approx_kl=0.0005 kl_stop=1 intervention_rate=0.0762 front_blocked=0
|
|
[Episode 9360] reward=-60922870.4 actor_loss=0.1226 critic_loss=130129758617.6000 entropy=3.5835 ent_coef=0.001916 approx_kl=0.0039 kl_stop=1 intervention_rate=0.0983 front_blocked=0
|
|
[Eval 9360] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-562589.1 mean_steps=11.5
|
|
[Episode 9370] reward=-62015111.6 actor_loss=0.1443 critic_loss=130771113574.4000 entropy=3.5845 ent_coef=0.001916 approx_kl=0.0022 kl_stop=1 intervention_rate=0.0970 front_blocked=0
|
|
[Episode 9380] reward=-59618887.6 actor_loss=0.1279 critic_loss=131029626294.8571 entropy=3.5829 ent_coef=0.001916 approx_kl=0.0038 kl_stop=1 intervention_rate=0.0846 front_blocked=0
|
|
[Eval 9380] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-492584.9 mean_steps=12.9
|
|
[Episode 9390] reward=-62823438.8 actor_loss=0.0960 critic_loss=129297922457.6000 entropy=3.5840 ent_coef=0.001915 approx_kl=0.0037 kl_stop=1 intervention_rate=0.0892 front_blocked=0
|
|
[Episode 9400] reward=-37399419.9 actor_loss=0.0859 critic_loss=117279905499.4286 entropy=3.5794 ent_coef=0.001915 approx_kl=0.0014 kl_stop=1 intervention_rate=0.0710 front_blocked=0
|
|
[Eval 9400] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-411877.7 mean_steps=13.9
|
|
[Episode 9410] reward=-61480908.5 actor_loss=0.1119 critic_loss=126719555993.6000 entropy=3.5835 ent_coef=0.001915 approx_kl=0.0038 kl_stop=1 intervention_rate=0.0872 front_blocked=0
|
|
[Episode 9420] reward=-52127658.9 actor_loss=0.1136 critic_loss=129532320153.6000 entropy=3.5852 ent_coef=0.001915 approx_kl=0.0016 kl_stop=1 intervention_rate=0.0775 front_blocked=0
|
|
[Eval 9420] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-640346.4 mean_steps=11.6
|
|
[Episode 9430] reward=-55714573.8 actor_loss=0.1207 critic_loss=124754340522.6667 entropy=3.5861 ent_coef=0.001915 approx_kl=0.0027 kl_stop=1 intervention_rate=0.0814 front_blocked=0
|
|
[Episode 9440] reward=-52643823.8 actor_loss=0.1324 critic_loss=123347476480.0000 entropy=3.5846 ent_coef=0.001915 approx_kl=0.0041 kl_stop=1 intervention_rate=0.0905 front_blocked=0
|
|
[Eval 9440] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-525916.5 mean_steps=12.4
|
|
[Episode 9450] reward=-46137703.8 actor_loss=0.1108 critic_loss=118930110464.0000 entropy=3.5838 ent_coef=0.001915 approx_kl=0.0050 kl_stop=1 intervention_rate=0.0781 front_blocked=0
|
|
[Episode 9460] reward=-66975262.0 actor_loss=0.1217 critic_loss=130689895330.9091 entropy=3.5794 ent_coef=0.001915 approx_kl=0.0005 kl_stop=1 intervention_rate=0.0957 front_blocked=0
|
|
[Eval 9460] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-594875.2 mean_steps=11.8
|
|
[Episode 9470] reward=-51646079.0 actor_loss=0.1336 critic_loss=126255015058.2857 entropy=3.5785 ent_coef=0.001915 approx_kl=0.0026 kl_stop=1 intervention_rate=0.0853 front_blocked=0
|
|
[Episode 9480] reward=-53262360.2 actor_loss=0.1117 critic_loss=125359496471.2727 entropy=3.5762 ent_coef=0.001915 approx_kl=0.0038 kl_stop=1 intervention_rate=0.0768 front_blocked=0
|
|
[Eval 9480] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-636587.4 mean_steps=11.3
|
|
[Episode 9490] reward=-52111331.0 actor_loss=0.1046 critic_loss=123047132160.0000 entropy=3.5753 ent_coef=0.001915 approx_kl=0.0035 kl_stop=1 intervention_rate=0.0801 front_blocked=0
|
|
[Episode 9500] reward=-57790497.0 actor_loss=0.1350 critic_loss=128265668198.4000 entropy=3.5717 ent_coef=0.001915 approx_kl=0.0042 kl_stop=1 intervention_rate=0.0898 front_blocked=0
|
|
[Eval 9500] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-571602.1 mean_steps=12.0
|
|
[Episode 9510] reward=-45072523.8 actor_loss=0.1000 critic_loss=121653212160.0000 entropy=3.5669 ent_coef=0.001914 approx_kl=0.0029 kl_stop=1 intervention_rate=0.0827 front_blocked=0
|
|
[Episode 9520] reward=-50461527.9 actor_loss=0.1466 critic_loss=121027129344.0000 entropy=3.5718 ent_coef=0.001914 approx_kl=0.0028 kl_stop=1 intervention_rate=0.0853 front_blocked=0
|
|
[Eval 9520] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-497500.6 mean_steps=11.8
|
|
[Episode 9530] reward=-60136631.8 actor_loss=0.1306 critic_loss=127230708004.5714 entropy=3.5711 ent_coef=0.001914 approx_kl=0.0029 kl_stop=1 intervention_rate=0.1048 front_blocked=0
|
|
[Episode 9540] reward=-56765703.9 actor_loss=0.1356 critic_loss=124962449408.0000 entropy=3.5712 ent_coef=0.001914 approx_kl=0.0020 kl_stop=1 intervention_rate=0.0879 front_blocked=0
|
|
[Eval 9540] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-526173.8 mean_steps=12.2
|
|
[Episode 9550] reward=-66471349.3 actor_loss=0.1378 critic_loss=132802052096.0000 entropy=3.5725 ent_coef=0.001914 approx_kl=0.0046 kl_stop=1 intervention_rate=0.0905 front_blocked=0
|
|
[Episode 9560] reward=-63211195.8 actor_loss=0.1157 critic_loss=135340785664.0000 entropy=3.5720 ent_coef=0.001914 approx_kl=0.0056 kl_stop=1 intervention_rate=0.0853 front_blocked=0
|
|
[Eval 9560] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-460956.3 mean_steps=13.3
|
|
[Episode 9570] reward=-60984071.9 actor_loss=0.1000 critic_loss=130147932842.6667 entropy=3.5725 ent_coef=0.001914 approx_kl=0.0036 kl_stop=1 intervention_rate=0.0853 front_blocked=0
|
|
[Episode 9580] reward=-57802922.9 actor_loss=0.0957 critic_loss=127002923827.2000 entropy=3.5724 ent_coef=0.001914 approx_kl=0.0036 kl_stop=1 intervention_rate=0.0846 front_blocked=0
|
|
[Eval 9580] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-443436.1 mean_steps=12.8
|
|
[Episode 9590] reward=-57064210.6 actor_loss=0.1173 critic_loss=127089463296.0000 entropy=3.5730 ent_coef=0.001914 approx_kl=0.0034 kl_stop=1 intervention_rate=0.0872 front_blocked=0
|
|
[Episode 9600] reward=-54824816.2 actor_loss=0.1423 critic_loss=121495885141.3333 entropy=3.5734 ent_coef=0.001914 approx_kl=0.0037 kl_stop=1 intervention_rate=0.0944 front_blocked=0
|
|
[Eval 9600] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-448198.4 mean_steps=13.4
|
|
[Episode 9610] reward=-43629978.3 actor_loss=0.0899 critic_loss=120741296915.6923 entropy=3.5708 ent_coef=0.001914 approx_kl=0.0025 kl_stop=1 intervention_rate=0.0742 front_blocked=0
|
|
[Episode 9620] reward=-57988188.9 actor_loss=0.1295 critic_loss=126120504320.0000 entropy=3.5678 ent_coef=0.001913 approx_kl=0.0046 kl_stop=1 intervention_rate=0.0924 front_blocked=0
|
|
[Eval 9620] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-501451.9 mean_steps=12.7
|
|
[Episode 9630] reward=-61728878.5 actor_loss=0.1174 critic_loss=133457907438.9333 entropy=3.5650 ent_coef=0.001913 approx_kl=0.0041 kl_stop=1 intervention_rate=0.0840 front_blocked=0
|
|
[Episode 9640] reward=-47178686.5 actor_loss=0.1096 critic_loss=120912239957.3333 entropy=3.5651 ent_coef=0.001913 approx_kl=0.0028 kl_stop=1 intervention_rate=0.0879 front_blocked=0
|
|
[Eval 9640] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-433860.1 mean_steps=13.2
|
|
[Episode 9650] reward=-52528987.0 actor_loss=0.1212 critic_loss=124405435596.8000 entropy=3.5644 ent_coef=0.001913 approx_kl=0.0040 kl_stop=1 intervention_rate=0.0755 front_blocked=0
|
|
[Episode 9660] reward=-47432117.4 actor_loss=0.1238 critic_loss=120574360917.3333 entropy=3.5639 ent_coef=0.001913 approx_kl=0.0019 kl_stop=1 intervention_rate=0.0866 front_blocked=0
|
|
[Eval 9660] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-578741.0 mean_steps=12.8
|
|
[Episode 9670] reward=-56494184.0 actor_loss=0.1055 critic_loss=126526323565.7143 entropy=3.5644 ent_coef=0.001913 approx_kl=0.0027 kl_stop=1 intervention_rate=0.0885 front_blocked=0
|
|
[Episode 9680] reward=-54553933.7 actor_loss=0.1005 critic_loss=125663540565.3333 entropy=3.5654 ent_coef=0.001913 approx_kl=0.0026 kl_stop=1 intervention_rate=0.0859 front_blocked=0
|
|
[Eval 9680] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-517513.9 mean_steps=12.2
|
|
[Episode 9690] reward=-49136617.5 actor_loss=0.1320 critic_loss=122114715648.0000 entropy=3.5658 ent_coef=0.001913 approx_kl=0.0038 kl_stop=1 intervention_rate=0.0840 front_blocked=0
|
|
[Episode 9700] reward=-49424682.2 actor_loss=0.1084 critic_loss=122870944116.3636 entropy=3.5660 ent_coef=0.001913 approx_kl=0.0021 kl_stop=1 intervention_rate=0.0827 front_blocked=0
|
|
[Eval 9700] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-437319.2 mean_steps=12.3
|
|
[Episode 9710] reward=-55932922.0 actor_loss=0.1057 critic_loss=128320038229.3333 entropy=3.5657 ent_coef=0.001913 approx_kl=0.0027 kl_stop=1 intervention_rate=0.0833 front_blocked=0
|
|
[Episode 9720] reward=-58854809.2 actor_loss=0.1284 critic_loss=122808915285.3333 entropy=3.5664 ent_coef=0.001913 approx_kl=0.0017 kl_stop=1 intervention_rate=0.0892 front_blocked=0
|
|
[Eval 9720] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-541503.2 mean_steps=13.0
|
|
[Episode 9730] reward=-57441993.5 actor_loss=0.0859 critic_loss=131224444108.8000 entropy=3.5647 ent_coef=0.001912 approx_kl=0.0027 kl_stop=1 intervention_rate=0.0833 front_blocked=0
|
|
[Episode 9740] reward=-49179707.7 actor_loss=0.1361 critic_loss=122977730560.0000 entropy=3.5601 ent_coef=0.001912 approx_kl=0.0045 kl_stop=1 intervention_rate=0.0788 front_blocked=0
|
|
[Eval 9740] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-574304.9 mean_steps=11.2
|
|
[Episode 9750] reward=-43861089.3 actor_loss=0.1181 critic_loss=120418225590.8571 entropy=3.5608 ent_coef=0.001912 approx_kl=0.0058 kl_stop=1 intervention_rate=0.0729 front_blocked=0
|
|
[Episode 9760] reward=-47958344.3 actor_loss=0.0947 critic_loss=117979493990.4000 entropy=3.5592 ent_coef=0.001912 approx_kl=0.0035 kl_stop=1 intervention_rate=0.0794 front_blocked=0
|
|
[Eval 9760] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-615223.7 mean_steps=11.6
|
|
[Episode 9770] reward=-59473825.0 actor_loss=0.1297 critic_loss=126602976369.7778 entropy=3.5605 ent_coef=0.001912 approx_kl=0.0023 kl_stop=1 intervention_rate=0.0898 front_blocked=0
|
|
[Episode 9780] reward=-49545976.5 actor_loss=0.1011 critic_loss=126023614464.0000 entropy=3.5584 ent_coef=0.001912 approx_kl=0.0028 kl_stop=1 intervention_rate=0.0742 front_blocked=0
|
|
[Eval 9780] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-538088.5 mean_steps=10.8
|
|
[Episode 9790] reward=-58391988.9 actor_loss=0.1142 critic_loss=125582253260.8000 entropy=3.5582 ent_coef=0.001912 approx_kl=0.0031 kl_stop=1 intervention_rate=0.0879 front_blocked=0
|
|
[Episode 9800] reward=-56506100.1 actor_loss=0.1277 critic_loss=127641164185.6000 entropy=3.5567 ent_coef=0.001912 approx_kl=0.0037 kl_stop=1 intervention_rate=0.0885 front_blocked=0
|
|
[Eval 9800] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-460668.5 mean_steps=13.7
|
|
[Episode 9810] reward=-51339851.4 actor_loss=0.1137 critic_loss=126041611468.8000 entropy=3.5563 ent_coef=0.001912 approx_kl=0.0032 kl_stop=1 intervention_rate=0.0788 front_blocked=0
|
|
[Episode 9820] reward=-52599798.0 actor_loss=0.1225 critic_loss=123476235264.0000 entropy=3.5538 ent_coef=0.001912 approx_kl=0.0050 kl_stop=1 intervention_rate=0.0807 front_blocked=0
|
|
[Eval 9820] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-429399.1 mean_steps=13.2
|
|
[Episode 9830] reward=-63433767.7 actor_loss=0.0991 critic_loss=132562087936.0000 entropy=3.5568 ent_coef=0.001912 approx_kl=0.0045 kl_stop=1 intervention_rate=0.0833 front_blocked=0
|
|
[Episode 9840] reward=-61518892.0 actor_loss=0.1374 critic_loss=132060358948.5714 entropy=3.5546 ent_coef=0.001911 approx_kl=0.0030 kl_stop=1 intervention_rate=0.0918 front_blocked=0
|
|
[Eval 9840] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-313373.3 mean_steps=14.0
|
|
[Episode 9850] reward=-52469413.4 actor_loss=0.1265 critic_loss=123347704490.6667 entropy=3.5548 ent_coef=0.001911 approx_kl=0.0042 kl_stop=1 intervention_rate=0.0814 front_blocked=0
|
|
[Episode 9860] reward=-43258979.9 actor_loss=0.1188 critic_loss=118881635532.8000 entropy=3.5515 ent_coef=0.001911 approx_kl=0.0028 kl_stop=1 intervention_rate=0.0788 front_blocked=0
|
|
[Eval 9860] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-449690.1 mean_steps=13.4
|
|
[Episode 9870] reward=-54583240.9 actor_loss=0.1159 critic_loss=127060725387.6364 entropy=3.5515 ent_coef=0.001911 approx_kl=0.0018 kl_stop=1 intervention_rate=0.0814 front_blocked=0
|
|
[Episode 9880] reward=-44363119.2 actor_loss=0.1075 critic_loss=119234745139.2000 entropy=3.5512 ent_coef=0.001911 approx_kl=0.0028 kl_stop=1 intervention_rate=0.0768 front_blocked=0
|
|
[Eval 9880] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-492455.3 mean_steps=12.0
|
|
[Episode 9890] reward=-63987660.3 actor_loss=0.1262 critic_loss=131754487352.8889 entropy=3.5493 ent_coef=0.001911 approx_kl=0.0025 kl_stop=1 intervention_rate=0.0905 front_blocked=0
|
|
[Episode 9900] reward=-52428820.7 actor_loss=0.1079 critic_loss=124202011852.8000 entropy=3.5497 ent_coef=0.001911 approx_kl=0.0030 kl_stop=1 intervention_rate=0.0872 front_blocked=0
|
|
[Eval 9900] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-492062.8 mean_steps=13.7
|
|
[Episode 9910] reward=-71811058.3 actor_loss=0.1341 critic_loss=136301888853.3333 entropy=3.5517 ent_coef=0.001911 approx_kl=0.0012 kl_stop=1 intervention_rate=0.0957 front_blocked=0
|
|
[Episode 9920] reward=-52768658.4 actor_loss=0.1151 critic_loss=125372194816.0000 entropy=3.5481 ent_coef=0.001911 approx_kl=0.0035 kl_stop=1 intervention_rate=0.0898 front_blocked=0
|
|
[Eval 9920] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-507031.4 mean_steps=12.8
|
|
[Episode 9930] reward=-52443998.0 actor_loss=0.1168 critic_loss=125682988869.8182 entropy=3.5489 ent_coef=0.001911 approx_kl=0.0030 kl_stop=1 intervention_rate=0.0788 front_blocked=0
|
|
[Episode 9940] reward=-55065870.9 actor_loss=0.1045 critic_loss=129278500864.0000 entropy=3.5500 ent_coef=0.001911 approx_kl=0.0032 kl_stop=1 intervention_rate=0.0846 front_blocked=0
|
|
[Eval 9940] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-354959.8 mean_steps=14.1
|
|
[Episode 9950] reward=-41378992.8 actor_loss=0.0898 critic_loss=121516254354.2857 entropy=3.5511 ent_coef=0.001910 approx_kl=0.0036 kl_stop=1 intervention_rate=0.0781 front_blocked=0
|
|
[Episode 9960] reward=-52563201.7 actor_loss=0.1126 critic_loss=124276433481.1429 entropy=3.5496 ent_coef=0.001910 approx_kl=0.0045 kl_stop=1 intervention_rate=0.0814 front_blocked=0
|
|
[Eval 9960] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-516830.3 mean_steps=12.1
|
|
[Episode 9970] reward=-60812974.4 actor_loss=0.1305 critic_loss=123740359338.6667 entropy=3.5503 ent_coef=0.001910 approx_kl=0.0041 kl_stop=1 intervention_rate=0.0964 front_blocked=0
|
|
[Episode 9980] reward=-63975886.3 actor_loss=0.1259 critic_loss=130158777685.3333 entropy=3.5489 ent_coef=0.001910 approx_kl=0.0021 kl_stop=1 intervention_rate=0.0833 front_blocked=0
|
|
[Eval 9980] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-610834.5 mean_steps=11.7
|
|
[Episode 9990] reward=-53340080.3 actor_loss=0.1102 critic_loss=123417257756.4444 entropy=3.5483 ent_coef=0.001910 approx_kl=0.0005 kl_stop=1 intervention_rate=0.0892 front_blocked=0
|
|
[Episode 10000] reward=-59677653.6 actor_loss=0.1266 critic_loss=125601921251.5556 entropy=3.5470 ent_coef=0.001910 approx_kl=0.0029 kl_stop=1 intervention_rate=0.0924 front_blocked=0
|
|
[Eval 10000] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-434625.9 mean_steps=13.8
|
|
[Episode 10010] reward=-56736719.3 actor_loss=0.1363 critic_loss=128873191237.8182 entropy=3.5450 ent_coef=0.001910 approx_kl=0.0013 kl_stop=1 intervention_rate=0.0807 front_blocked=0
|
|
[Episode 10020] reward=-55963863.8 actor_loss=0.1296 critic_loss=124298866232.8889 entropy=3.5418 ent_coef=0.001910 approx_kl=0.0014 kl_stop=1 intervention_rate=0.0951 front_blocked=0
|
|
[Eval 10020] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-542606.6 mean_steps=12.9
|
|
[Episode 10030] reward=-58210294.5 actor_loss=0.1027 critic_loss=125811134919.1111 entropy=3.5392 ent_coef=0.001910 approx_kl=0.0021 kl_stop=1 intervention_rate=0.0827 front_blocked=0
|
|
[Episode 10040] reward=-54787502.9 actor_loss=0.1137 critic_loss=126445360859.4286 entropy=3.5383 ent_coef=0.001910 approx_kl=0.0041 kl_stop=1 intervention_rate=0.0892 front_blocked=0
|
|
[Eval 10040] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-413486.1 mean_steps=13.8
|
|
[Episode 10050] reward=-61548790.9 actor_loss=0.1490 critic_loss=128274165760.0000 entropy=3.5391 ent_coef=0.001910 approx_kl=0.0030 kl_stop=1 intervention_rate=0.0938 front_blocked=0
|
|
[Episode 10060] reward=-44325762.1 actor_loss=0.1227 critic_loss=119160125440.0000 entropy=3.5373 ent_coef=0.001909 approx_kl=0.0016 kl_stop=1 intervention_rate=0.0827 front_blocked=0
|
|
[Eval 10060] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-601103.2 mean_steps=11.3
|
|
[Episode 10070] reward=-57133803.4 actor_loss=0.1163 critic_loss=124889743360.0000 entropy=3.5384 ent_coef=0.001909 approx_kl=0.0043 kl_stop=1 intervention_rate=0.0905 front_blocked=0
|
|
[Episode 10080] reward=-58030064.8 actor_loss=0.1510 critic_loss=128606779099.4286 entropy=3.5375 ent_coef=0.001909 approx_kl=0.0040 kl_stop=1 intervention_rate=0.0957 front_blocked=0
|
|
[Eval 10080] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-351036.9 mean_steps=14.2
|
|
[Episode 10090] reward=-56558347.8 actor_loss=0.1216 critic_loss=126897829205.3333 entropy=3.5367 ent_coef=0.001909 approx_kl=0.0035 kl_stop=1 intervention_rate=0.0859 front_blocked=0
|
|
[Episode 10100] reward=-57516172.7 actor_loss=0.1408 critic_loss=129266297241.6000 entropy=3.5362 ent_coef=0.001909 approx_kl=0.0021 kl_stop=1 intervention_rate=0.0918 front_blocked=0
|
|
[Eval 10100] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-484748.2 mean_steps=12.8
|
|
[Episode 10110] reward=-64636358.4 actor_loss=0.1053 critic_loss=132325743616.0000 entropy=3.5368 ent_coef=0.001909 approx_kl=0.0021 kl_stop=1 intervention_rate=0.0885 front_blocked=0
|
|
[Episode 10120] reward=-47423798.1 actor_loss=0.1407 critic_loss=122495600412.4444 entropy=3.5379 ent_coef=0.001909 approx_kl=0.0024 kl_stop=1 intervention_rate=0.0827 front_blocked=0
|
|
[Eval 10120] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-542108.1 mean_steps=12.6
|
|
[Episode 10130] reward=-49660737.9 actor_loss=0.0786 critic_loss=125775372288.0000 entropy=3.5345 ent_coef=0.001909 approx_kl=0.0042 kl_stop=1 intervention_rate=0.0833 front_blocked=0
|
|
[Episode 10140] reward=-49158308.2 actor_loss=0.1154 critic_loss=126244382037.3333 entropy=3.5318 ent_coef=0.001909 approx_kl=0.0031 kl_stop=1 intervention_rate=0.0729 front_blocked=0
|
|
[Eval 10140] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-454342.5 mean_steps=13.4
|
|
[Episode 10150] reward=-42703911.1 actor_loss=0.1192 critic_loss=119357620224.0000 entropy=3.5319 ent_coef=0.001909 approx_kl=0.0024 kl_stop=1 intervention_rate=0.0794 front_blocked=0
|
|
[Episode 10160] reward=-35863698.7 actor_loss=0.1043 critic_loss=112992812311.2727 entropy=3.5322 ent_coef=0.001909 approx_kl=0.0022 kl_stop=1 intervention_rate=0.0742 front_blocked=0
|
|
[Eval 10160] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-538141.9 mean_steps=13.1
|
|
[Episode 10170] reward=-58389842.2 actor_loss=0.0910 critic_loss=128451193304.6154 entropy=3.5313 ent_coef=0.001908 approx_kl=0.0031 kl_stop=1 intervention_rate=0.0846 front_blocked=0
|
|
[Episode 10180] reward=-57095472.4 actor_loss=0.0873 critic_loss=126521651572.3636 entropy=3.5278 ent_coef=0.001908 approx_kl=0.0043 kl_stop=1 intervention_rate=0.0820 front_blocked=0
|
|
[Eval 10180] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-458006.5 mean_steps=13.6
|
|
[Episode 10190] reward=-51239498.7 actor_loss=0.1514 critic_loss=121921495040.0000 entropy=3.5285 ent_coef=0.001908 approx_kl=0.0016 kl_stop=1 intervention_rate=0.0898 front_blocked=0
|
|
[Episode 10200] reward=-71700846.4 actor_loss=0.1384 critic_loss=137602298228.3636 entropy=3.5255 ent_coef=0.001908 approx_kl=0.0015 kl_stop=1 intervention_rate=0.0892 front_blocked=0
|
|
[Eval 10200] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-494323.4 mean_steps=12.8
|
|
[Episode 10210] reward=-57065584.2 actor_loss=0.1065 critic_loss=130407412531.2000 entropy=3.5277 ent_coef=0.001908 approx_kl=0.0065 kl_stop=1 intervention_rate=0.0872 front_blocked=0
|
|
[Episode 10220] reward=-56918603.8 actor_loss=0.1654 critic_loss=125849706496.0000 entropy=3.5277 ent_coef=0.001908 approx_kl=0.0019 kl_stop=1 intervention_rate=0.0911 front_blocked=0
|
|
[Eval 10220] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-553430.1 mean_steps=12.6
|
|
[Episode 10230] reward=-58360855.3 actor_loss=0.1361 critic_loss=126429285814.8571 entropy=3.5268 ent_coef=0.001908 approx_kl=0.0015 kl_stop=1 intervention_rate=0.0866 front_blocked=0
|
|
[Episode 10240] reward=-54140839.2 actor_loss=0.1141 critic_loss=123550253056.0000 entropy=3.5281 ent_coef=0.001908 approx_kl=0.0038 kl_stop=1 intervention_rate=0.0872 front_blocked=0
|
|
[Eval 10240] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-533276.3 mean_steps=13.2
|
|
[Episode 10250] reward=-57810292.4 actor_loss=0.1489 critic_loss=127505299046.4000 entropy=3.5224 ent_coef=0.001908 approx_kl=0.0016 kl_stop=1 intervention_rate=0.0911 front_blocked=0
|
|
[Episode 10260] reward=-73092394.7 actor_loss=0.1303 critic_loss=134787137536.0000 entropy=3.5210 ent_coef=0.001908 approx_kl=0.0025 kl_stop=1 intervention_rate=0.0970 front_blocked=0
|
|
[Eval 10260] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-517725.2 mean_steps=12.3
|
|
[Episode 10270] reward=-51177539.1 actor_loss=0.1057 critic_loss=124748617435.4286 entropy=3.5188 ent_coef=0.001908 approx_kl=0.0021 kl_stop=1 intervention_rate=0.0859 front_blocked=0
|
|
[Episode 10280] reward=-58000777.9 actor_loss=0.1188 critic_loss=128531657523.2000 entropy=3.5187 ent_coef=0.001907 approx_kl=0.0050 kl_stop=1 intervention_rate=0.0827 front_blocked=0
|
|
[Eval 10280] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-427229.0 mean_steps=13.2
|
|
[Episode 10290] reward=-53068903.9 actor_loss=0.1073 critic_loss=122713176064.0000 entropy=3.5203 ent_coef=0.001907 approx_kl=0.0026 kl_stop=1 intervention_rate=0.0879 front_blocked=0
|
|
[Episode 10300] reward=-55311156.6 actor_loss=0.1370 critic_loss=122946938197.3333 entropy=3.5169 ent_coef=0.001907 approx_kl=0.0039 kl_stop=1 intervention_rate=0.0885 front_blocked=0
|
|
[Eval 10300] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-509411.6 mean_steps=13.6
|
|
[Episode 10310] reward=-48306599.1 actor_loss=0.1253 critic_loss=120584676352.0000 entropy=3.5148 ent_coef=0.001907 approx_kl=0.0050 kl_stop=1 intervention_rate=0.0833 front_blocked=0
|
|
[Episode 10320] reward=-51036325.3 actor_loss=0.1095 critic_loss=120066655573.3333 entropy=3.5142 ent_coef=0.001907 approx_kl=0.0031 kl_stop=1 intervention_rate=0.0794 front_blocked=0
|
|
[Eval 10320] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-256992.6 mean_steps=15.2
|
|
[Episode 10330] reward=-59295996.3 actor_loss=0.0976 critic_loss=127523757533.8667 entropy=3.5113 ent_coef=0.001907 approx_kl=0.0048 kl_stop=1 intervention_rate=0.0840 front_blocked=0
|
|
[Episode 10340] reward=-47263762.3 actor_loss=0.1016 critic_loss=124612677176.8889 entropy=3.5113 ent_coef=0.001907 approx_kl=0.0038 kl_stop=1 intervention_rate=0.0729 front_blocked=0
|
|
[Eval 10340] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-472810.5 mean_steps=12.1
|
|
[Episode 10350] reward=-56245516.0 actor_loss=0.0859 critic_loss=129670961152.0000 entropy=3.5116 ent_coef=0.001907 approx_kl=0.0058 kl_stop=1 intervention_rate=0.0820 front_blocked=0
|
|
[Episode 10360] reward=-58450603.6 actor_loss=0.1450 critic_loss=127924324194.4615 entropy=3.5090 ent_coef=0.001907 approx_kl=0.0029 kl_stop=1 intervention_rate=0.0924 front_blocked=0
|
|
[Eval 10360] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-368429.2 mean_steps=14.2
|
|
[Episode 10370] reward=-53736170.0 actor_loss=0.1002 critic_loss=124679281049.6000 entropy=3.5081 ent_coef=0.001907 approx_kl=0.0009 kl_stop=1 intervention_rate=0.0918 front_blocked=0
|
|
[Episode 10380] reward=-60952872.8 actor_loss=0.1003 critic_loss=133366200183.4667 entropy=3.5058 ent_coef=0.001907 approx_kl=0.0024 kl_stop=1 intervention_rate=0.0840 front_blocked=0
|
|
[Eval 10380] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-258611.3 mean_steps=15.2
|
|
[Episode 10390] reward=-53259064.5 actor_loss=0.1181 critic_loss=121537044480.0000 entropy=3.5033 ent_coef=0.001906 approx_kl=0.0026 kl_stop=1 intervention_rate=0.0846 front_blocked=0
|
|
[Episode 10400] reward=-52719804.8 actor_loss=0.1244 critic_loss=124417190570.6667 entropy=3.5046 ent_coef=0.001906 approx_kl=0.0033 kl_stop=1 intervention_rate=0.0814 front_blocked=0
|
|
[Eval 10400] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-465311.8 mean_steps=13.6
|
|
[Episode 10410] reward=-62351183.9 actor_loss=0.1618 critic_loss=127242672686.5455 entropy=3.5002 ent_coef=0.001906 approx_kl=0.0018 kl_stop=1 intervention_rate=0.0983 front_blocked=0
|
|
[Episode 10420] reward=-54540547.6 actor_loss=0.1185 critic_loss=130241806336.0000 entropy=3.4955 ent_coef=0.001906 approx_kl=0.0025 kl_stop=1 intervention_rate=0.0833 front_blocked=0
|
|
[Eval 10420] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-538962.3 mean_steps=12.4
|
|
[Episode 10430] reward=-48955039.2 actor_loss=0.0988 critic_loss=120567128064.0000 entropy=3.4972 ent_coef=0.001906 approx_kl=0.0025 kl_stop=1 intervention_rate=0.0768 front_blocked=0
|
|
[Episode 10440] reward=-46042757.5 actor_loss=0.0732 critic_loss=122361366118.4000 entropy=3.4962 ent_coef=0.001906 approx_kl=0.0037 kl_stop=1 intervention_rate=0.0775 front_blocked=0
|
|
[Eval 10440] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-575844.5 mean_steps=11.9
|
|
[Episode 10450] reward=-62537225.0 actor_loss=0.1250 critic_loss=133416878080.0000 entropy=3.4923 ent_coef=0.001906 approx_kl=0.0062 kl_stop=1 intervention_rate=0.0853 front_blocked=0
|
|
[Episode 10460] reward=-57372064.7 actor_loss=0.1730 critic_loss=128566845440.0000 entropy=3.4919 ent_coef=0.001906 approx_kl=0.0026 kl_stop=1 intervention_rate=0.0885 front_blocked=0
|
|
[Eval 10460] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-412166.3 mean_steps=13.8
|
|
[Episode 10470] reward=-52818570.8 actor_loss=0.1054 critic_loss=127494954097.7778 entropy=3.4880 ent_coef=0.001906 approx_kl=0.0052 kl_stop=1 intervention_rate=0.0801 front_blocked=0
|
|
[Episode 10480] reward=-61418625.2 actor_loss=0.1409 critic_loss=128208801792.0000 entropy=3.4872 ent_coef=0.001906 approx_kl=0.0038 kl_stop=1 intervention_rate=0.0931 front_blocked=0
|
|
[Eval 10480] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-542624.1 mean_steps=12.6
|
|
[Episode 10490] reward=-54917112.2 actor_loss=0.1008 critic_loss=126347476992.0000 entropy=3.4841 ent_coef=0.001906 approx_kl=0.0019 kl_stop=1 intervention_rate=0.0749 front_blocked=0
|
|
[Episode 10500] reward=-51054834.2 actor_loss=0.0878 critic_loss=122438553014.8571 entropy=3.4845 ent_coef=0.001906 approx_kl=0.0045 kl_stop=1 intervention_rate=0.0801 front_blocked=0
|
|
[Eval 10500] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-406375.4 mean_steps=13.2
|
|
[Episode 10510] reward=-62942819.9 actor_loss=0.1119 critic_loss=128226578432.0000 entropy=3.4845 ent_coef=0.001905 approx_kl=0.0039 kl_stop=1 intervention_rate=0.0898 front_blocked=0
|
|
[Episode 10520] reward=-56523856.6 actor_loss=0.1026 critic_loss=129069909138.2857 entropy=3.4858 ent_coef=0.001905 approx_kl=0.0014 kl_stop=1 intervention_rate=0.0872 front_blocked=0
|
|
[Eval 10520] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-469685.4 mean_steps=13.5
|
|
[Episode 10530] reward=-38111637.4 actor_loss=0.0834 critic_loss=116504187997.0909 entropy=3.4886 ent_coef=0.001905 approx_kl=0.0021 kl_stop=1 intervention_rate=0.0677 front_blocked=0
|
|
[Episode 10540] reward=-52599873.7 actor_loss=0.1196 critic_loss=122055271219.2000 entropy=3.4888 ent_coef=0.001905 approx_kl=0.0023 kl_stop=1 intervention_rate=0.0840 front_blocked=0
|
|
[Eval 10540] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-498901.8 mean_steps=12.9
|
|
[Episode 10550] reward=-49616147.7 actor_loss=0.1209 critic_loss=118769600512.0000 entropy=3.4878 ent_coef=0.001905 approx_kl=0.0024 kl_stop=1 intervention_rate=0.0820 front_blocked=0
|
|
[Episode 10560] reward=-50400163.8 actor_loss=0.0901 critic_loss=126577860608.0000 entropy=3.4882 ent_coef=0.001905 approx_kl=0.0049 kl_stop=1 intervention_rate=0.0749 front_blocked=0
|
|
[Eval 10560] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-479795.1 mean_steps=12.8
|
|
[Episode 10570] reward=-56433203.2 actor_loss=0.1025 critic_loss=125185221778.2857 entropy=3.4851 ent_coef=0.001905 approx_kl=0.0032 kl_stop=1 intervention_rate=0.0788 front_blocked=0
|
|
[Episode 10580] reward=-65108400.7 actor_loss=0.1274 critic_loss=134618611712.0000 entropy=3.4833 ent_coef=0.001905 approx_kl=0.0054 kl_stop=1 intervention_rate=0.0918 front_blocked=0
|
|
[Eval 10580] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-548685.4 mean_steps=12.3
|
|
[Episode 10590] reward=-51916073.8 actor_loss=0.1064 critic_loss=124908891704.8889 entropy=3.4821 ent_coef=0.001905 approx_kl=0.0030 kl_stop=1 intervention_rate=0.0781 front_blocked=0
|
|
[Episode 10600] reward=-49909482.9 actor_loss=0.1477 critic_loss=119910462385.2308 entropy=3.4837 ent_coef=0.001905 approx_kl=0.0004 kl_stop=1 intervention_rate=0.0840 front_blocked=0
|
|
[Eval 10600] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-515966.6 mean_steps=12.9
|
|
[Episode 10610] reward=-64532411.4 actor_loss=0.1448 critic_loss=130802924078.5455 entropy=3.4807 ent_coef=0.001905 approx_kl=0.0052 kl_stop=1 intervention_rate=0.0990 front_blocked=0
|
|
[Episode 10620] reward=-56999131.9 actor_loss=0.1046 critic_loss=129349210484.3636 entropy=3.4808 ent_coef=0.001904 approx_kl=0.0022 kl_stop=1 intervention_rate=0.0872 front_blocked=0
|
|
[Eval 10620] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-406220.7 mean_steps=13.6
|
|
[Episode 10630] reward=-59051189.5 actor_loss=0.1345 critic_loss=131913614336.0000 entropy=3.4823 ent_coef=0.001904 approx_kl=0.0025 kl_stop=1 intervention_rate=0.0898 front_blocked=0
|
|
[Episode 10640] reward=-56771341.1 actor_loss=0.1214 critic_loss=126655027492.5714 entropy=3.4817 ent_coef=0.001904 approx_kl=0.0029 kl_stop=1 intervention_rate=0.0898 front_blocked=0
|
|
[Eval 10640] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-510181.6 mean_steps=13.8
|
|
[Episode 10650] reward=-58084658.1 actor_loss=0.1132 critic_loss=130151767244.8000 entropy=3.4805 ent_coef=0.001904 approx_kl=0.0024 kl_stop=1 intervention_rate=0.0918 front_blocked=0
|
|
[Episode 10660] reward=-49425288.3 actor_loss=0.0960 critic_loss=126733256890.1818 entropy=3.4808 ent_coef=0.001904 approx_kl=-0.0007 kl_stop=1 intervention_rate=0.0820 front_blocked=0
|
|
[Eval 10660] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-495741.4 mean_steps=12.8
|
|
[Episode 10670] reward=-54310584.1 actor_loss=0.1050 critic_loss=125341727597.7143 entropy=3.4820 ent_coef=0.001904 approx_kl=0.0044 kl_stop=1 intervention_rate=0.0866 front_blocked=0
|
|
[Episode 10680] reward=-45206196.9 actor_loss=0.0895 critic_loss=120520809585.7778 entropy=3.4817 ent_coef=0.001904 approx_kl=0.0020 kl_stop=1 intervention_rate=0.0716 front_blocked=0
|
|
[Eval 10680] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-468341.0 mean_steps=14.3
|
|
[Episode 10690] reward=-49756507.3 actor_loss=0.1234 critic_loss=125364395008.0000 entropy=3.4818 ent_coef=0.001904 approx_kl=0.0012 kl_stop=1 intervention_rate=0.0794 front_blocked=0
|
|
[Episode 10700] reward=-58270561.5 actor_loss=0.0993 critic_loss=126667410090.6667 entropy=3.4827 ent_coef=0.001904 approx_kl=0.0033 kl_stop=1 intervention_rate=0.0820 front_blocked=0
|
|
[Eval 10700] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-552517.1 mean_steps=11.7
|
|
[Episode 10710] reward=-57038431.0 actor_loss=0.1381 critic_loss=127296944537.6000 entropy=3.4853 ent_coef=0.001904 approx_kl=0.0038 kl_stop=1 intervention_rate=0.0931 front_blocked=0
|
|
[Episode 10720] reward=-48708619.4 actor_loss=0.1024 critic_loss=123643816813.7143 entropy=3.4854 ent_coef=0.001904 approx_kl=0.0046 kl_stop=1 intervention_rate=0.0723 front_blocked=0
|
|
[Eval 10720] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-475510.1 mean_steps=13.5
|
|
[Episode 10730] reward=-51061559.2 actor_loss=0.0980 critic_loss=125548599854.5455 entropy=3.4842 ent_coef=0.001903 approx_kl=0.0050 kl_stop=1 intervention_rate=0.0768 front_blocked=0
|
|
[Episode 10740] reward=-52490681.4 actor_loss=0.1169 critic_loss=122083337011.2000 entropy=3.4800 ent_coef=0.001903 approx_kl=0.0008 kl_stop=1 intervention_rate=0.0742 front_blocked=0
|
|
[Eval 10740] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-435049.7 mean_steps=13.2
|
|
[Episode 10750] reward=-54010437.7 actor_loss=0.1376 critic_loss=123322611419.4286 entropy=3.4752 ent_coef=0.001903 approx_kl=-0.0002 kl_stop=1 intervention_rate=0.0938 front_blocked=0
|
|
[Episode 10760] reward=-52315669.8 actor_loss=0.1074 critic_loss=126257449252.5714 entropy=3.4734 ent_coef=0.001903 approx_kl=0.0049 kl_stop=1 intervention_rate=0.0807 front_blocked=0
|
|
[Eval 10760] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-668320.5 mean_steps=10.6
|
|
[Episode 10770] reward=-51832797.3 actor_loss=0.1046 critic_loss=123831357124.9231 entropy=3.4736 ent_coef=0.001903 approx_kl=0.0057 kl_stop=1 intervention_rate=0.0814 front_blocked=0
|
|
[Episode 10780] reward=-55513207.4 actor_loss=0.1467 critic_loss=128347959881.1429 entropy=3.4718 ent_coef=0.001903 approx_kl=0.0033 kl_stop=1 intervention_rate=0.0905 front_blocked=0
|
|
[Eval 10780] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-421680.5 mean_steps=14.4
|
|
[Episode 10790] reward=-58734924.9 actor_loss=0.1010 critic_loss=128778461928.7273 entropy=3.4701 ent_coef=0.001903 approx_kl=0.0045 kl_stop=1 intervention_rate=0.0814 front_blocked=0
|
|
[Episode 10800] reward=-54525498.7 actor_loss=0.1208 critic_loss=122001006592.0000 entropy=3.4696 ent_coef=0.001903 approx_kl=0.0053 kl_stop=1 intervention_rate=0.0820 front_blocked=0
|
|
[Eval 10800] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-370966.0 mean_steps=13.8
|
|
[Episode 10810] reward=-51374745.7 actor_loss=0.1053 critic_loss=122422194491.0769 entropy=3.4667 ent_coef=0.001903 approx_kl=0.0040 kl_stop=1 intervention_rate=0.0814 front_blocked=0
|
|
[Episode 10820] reward=-58795325.9 actor_loss=0.1295 critic_loss=130588481243.4286 entropy=3.4608 ent_coef=0.001903 approx_kl=0.0035 kl_stop=1 intervention_rate=0.0846 front_blocked=0
|
|
[Eval 10820] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-609936.1 mean_steps=12.1
|
|
[Episode 10830] reward=-52667179.5 actor_loss=0.1123 critic_loss=127215570571.6364 entropy=3.4608 ent_coef=0.001903 approx_kl=0.0024 kl_stop=1 intervention_rate=0.0833 front_blocked=0
|
|
[Episode 10840] reward=-59205904.2 actor_loss=0.1283 critic_loss=124124923676.4444 entropy=3.4573 ent_coef=0.001902 approx_kl=0.0013 kl_stop=1 intervention_rate=0.0892 front_blocked=0
|
|
[Eval 10840] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-432403.8 mean_steps=13.3
|
|
[Episode 10850] reward=-57854625.3 actor_loss=0.0906 critic_loss=124487573504.0000 entropy=3.4575 ent_coef=0.001902 approx_kl=0.0030 kl_stop=1 intervention_rate=0.0840 front_blocked=0
|
|
[Episode 10860] reward=-51919459.5 actor_loss=0.1280 critic_loss=121963959091.2000 entropy=3.4565 ent_coef=0.001902 approx_kl=0.0024 kl_stop=1 intervention_rate=0.0736 front_blocked=0
|
|
[Eval 10860] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-551566.9 mean_steps=11.8
|
|
[Episode 10870] reward=-57721465.0 actor_loss=0.1397 critic_loss=125498162289.7778 entropy=3.4558 ent_coef=0.001902 approx_kl=0.0015 kl_stop=1 intervention_rate=0.0827 front_blocked=0
|
|
[Episode 10880] reward=-56838954.3 actor_loss=0.1045 critic_loss=128846166574.5455 entropy=3.4568 ent_coef=0.001902 approx_kl=0.0032 kl_stop=1 intervention_rate=0.0807 front_blocked=0
|
|
[Eval 10880] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-378710.8 mean_steps=13.4
|
|
[Episode 10890] reward=-46532829.8 actor_loss=0.0997 critic_loss=119039597102.5455 entropy=3.4536 ent_coef=0.001902 approx_kl=0.0029 kl_stop=1 intervention_rate=0.0788 front_blocked=0
|
|
[Episode 10900] reward=-54504455.6 actor_loss=0.0985 critic_loss=124036839833.6000 entropy=3.4525 ent_coef=0.001902 approx_kl=0.0033 kl_stop=1 intervention_rate=0.0918 front_blocked=0
|
|
[Eval 10900] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-411359.5 mean_steps=14.6
|
|
[Episode 10910] reward=-50235714.0 actor_loss=0.0939 critic_loss=121415881159.1111 entropy=3.4506 ent_coef=0.001902 approx_kl=0.0026 kl_stop=1 intervention_rate=0.0775 front_blocked=0
|
|
[Episode 10920] reward=-61729974.7 actor_loss=0.1374 critic_loss=126131456930.9091 entropy=3.4481 ent_coef=0.001902 approx_kl=0.0020 kl_stop=1 intervention_rate=0.0944 front_blocked=0
|
|
[Eval 10920] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-607502.8 mean_steps=12.2
|
|
[Episode 10930] reward=-54723720.7 actor_loss=0.1208 critic_loss=126551450965.3333 entropy=3.4468 ent_coef=0.001902 approx_kl=0.0045 kl_stop=1 intervention_rate=0.0814 front_blocked=0
|
|
[Episode 10940] reward=-56435128.9 actor_loss=0.1010 critic_loss=119840688128.0000 entropy=3.4483 ent_coef=0.001902 approx_kl=0.0039 kl_stop=1 intervention_rate=0.0905 front_blocked=0
|
|
[Eval 10940] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-431864.8 mean_steps=13.2
|
|
[Episode 10950] reward=-55659302.4 actor_loss=0.1146 critic_loss=125450057955.5556 entropy=3.4495 ent_coef=0.001901 approx_kl=0.0034 kl_stop=1 intervention_rate=0.0840 front_blocked=0
|
|
[Episode 10960] reward=-65718820.4 actor_loss=0.1257 critic_loss=131226791563.6364 entropy=3.4510 ent_coef=0.001901 approx_kl=0.0007 kl_stop=1 intervention_rate=0.0944 front_blocked=0
|
|
[Eval 10960] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-548689.3 mean_steps=12.4
|
|
[Episode 10970] reward=-63913732.2 actor_loss=0.1366 critic_loss=130038390784.0000 entropy=3.4490 ent_coef=0.001901 approx_kl=0.0021 kl_stop=1 intervention_rate=0.0911 front_blocked=0
|
|
[Episode 10980] reward=-54531023.6 actor_loss=0.0902 critic_loss=125572051529.1429 entropy=3.4502 ent_coef=0.001901 approx_kl=0.0039 kl_stop=1 intervention_rate=0.0872 front_blocked=0
|
|
[Eval 10980] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-582750.8 mean_steps=11.2
|
|
[Episode 10990] reward=-58243774.1 actor_loss=0.1382 critic_loss=130979146020.5714 entropy=3.4498 ent_coef=0.001901 approx_kl=0.0030 kl_stop=1 intervention_rate=0.0866 front_blocked=0
|
|
[Episode 11000] reward=-49665773.5 actor_loss=0.1658 critic_loss=121755636829.0909 entropy=3.4484 ent_coef=0.001901 approx_kl=0.0042 kl_stop=1 intervention_rate=0.0814 front_blocked=0
|
|
[Eval 11000] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-602571.3 mean_steps=12.3
|
|
[Episode 11010] reward=-49038918.1 actor_loss=0.1076 critic_loss=122635684864.0000 entropy=3.4470 ent_coef=0.001901 approx_kl=0.0032 kl_stop=1 intervention_rate=0.0833 front_blocked=0
|
|
[Episode 11020] reward=-55840856.9 actor_loss=0.1327 critic_loss=126019049881.6000 entropy=3.4474 ent_coef=0.001901 approx_kl=0.0042 kl_stop=1 intervention_rate=0.0931 front_blocked=0
|
|
[Eval 11020] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-492640.3 mean_steps=12.8
|
|
[Episode 11030] reward=-49621550.1 actor_loss=0.1141 critic_loss=123394194711.2727 entropy=3.4474 ent_coef=0.001901 approx_kl=0.0041 kl_stop=1 intervention_rate=0.0814 front_blocked=0
|
|
[Episode 11040] reward=-45762371.0 actor_loss=0.1103 critic_loss=118447036643.5556 entropy=3.4452 ent_coef=0.001901 approx_kl=0.0049 kl_stop=1 intervention_rate=0.0794 front_blocked=0
|
|
[Eval 11040] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-501185.8 mean_steps=13.3
|
|
[Episode 11050] reward=-63191476.6 actor_loss=0.1222 critic_loss=130321001130.6667 entropy=3.4462 ent_coef=0.001901 approx_kl=0.0074 kl_stop=1 intervention_rate=0.0911 front_blocked=0
|
|
[Episode 11060] reward=-57457142.5 actor_loss=0.1359 critic_loss=126993737045.3333 entropy=3.4445 ent_coef=0.001900 approx_kl=0.0014 kl_stop=1 intervention_rate=0.0918 front_blocked=0
|
|
[Eval 11060] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-556399.3 mean_steps=11.8
|
|
[Episode 11070] reward=-54101363.4 actor_loss=0.1255 critic_loss=126688811008.0000 entropy=3.4447 ent_coef=0.001900 approx_kl=0.0049 kl_stop=1 intervention_rate=0.0859 front_blocked=0
|
|
[Episode 11080] reward=-64046748.8 actor_loss=0.1303 critic_loss=130032985702.4000 entropy=3.4467 ent_coef=0.001900 approx_kl=0.0012 kl_stop=1 intervention_rate=0.0911 front_blocked=0
|
|
[Eval 11080] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-479216.3 mean_steps=13.8
|
|
[Episode 11090] reward=-60120984.0 actor_loss=0.1346 critic_loss=127483330560.0000 entropy=3.4443 ent_coef=0.001900 approx_kl=0.0045 kl_stop=1 intervention_rate=0.0892 front_blocked=0
|
|
[Episode 11100] reward=-45945309.5 actor_loss=0.0866 critic_loss=123201142784.0000 entropy=3.4440 ent_coef=0.001900 approx_kl=0.0051 kl_stop=1 intervention_rate=0.0697 front_blocked=0
|
|
[Eval 11100] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-485156.9 mean_steps=13.8
|
|
[Episode 11110] reward=-53353954.4 actor_loss=0.0815 critic_loss=125182101504.0000 entropy=3.4422 ent_coef=0.001900 approx_kl=0.0018 kl_stop=1 intervention_rate=0.0736 front_blocked=0
|
|
[Episode 11120] reward=-55985892.9 actor_loss=0.1352 critic_loss=126302045525.3333 entropy=3.4401 ent_coef=0.001900 approx_kl=0.0037 kl_stop=1 intervention_rate=0.0892 front_blocked=0
|
|
[Eval 11120] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-635317.3 mean_steps=11.5
|
|
[Episode 11130] reward=-61090666.0 actor_loss=0.1141 critic_loss=129184611441.7778 entropy=3.4398 ent_coef=0.001900 approx_kl=0.0021 kl_stop=1 intervention_rate=0.0931 front_blocked=0
|
|
[Episode 11140] reward=-60109250.2 actor_loss=0.1075 critic_loss=128530042197.3333 entropy=3.4412 ent_coef=0.001900 approx_kl=0.0019 kl_stop=1 intervention_rate=0.0911 front_blocked=0
|
|
[Eval 11140] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-575439.0 mean_steps=11.2
|
|
[Episode 11150] reward=-60871631.1 actor_loss=0.1504 critic_loss=128692809728.0000 entropy=3.4434 ent_coef=0.001900 approx_kl=0.0028 kl_stop=1 intervention_rate=0.0885 front_blocked=0
|
|
[Episode 11160] reward=-58658814.3 actor_loss=0.0999 critic_loss=126748364800.0000 entropy=3.4410 ent_coef=0.001900 approx_kl=0.0023 kl_stop=1 intervention_rate=0.0853 front_blocked=0
|
|
[Eval 11160] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-426700.5 mean_steps=13.3
|
|
[Episode 11170] reward=-56080047.9 actor_loss=0.1125 critic_loss=125602767667.2000 entropy=3.4425 ent_coef=0.001899 approx_kl=0.0031 kl_stop=1 intervention_rate=0.0801 front_blocked=0
|
|
[Episode 11180] reward=-57577140.2 actor_loss=0.1139 critic_loss=129129669778.2857 entropy=3.4468 ent_coef=0.001899 approx_kl=0.0027 kl_stop=1 intervention_rate=0.0846 front_blocked=0
|
|
[Eval 11180] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-379297.6 mean_steps=12.8
|
|
[Episode 11190] reward=-66028172.2 actor_loss=0.1367 critic_loss=135107835221.3333 entropy=3.4452 ent_coef=0.001899 approx_kl=0.0009 kl_stop=1 intervention_rate=0.0918 front_blocked=0
|
|
[Episode 11200] reward=-58620395.6 actor_loss=0.1245 critic_loss=125683816675.5556 entropy=3.4471 ent_coef=0.001899 approx_kl=0.0006 kl_stop=1 intervention_rate=0.0866 front_blocked=0
|
|
[Eval 11200] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-529709.7 mean_steps=12.4
|
|
[Episode 11210] reward=-64733173.3 actor_loss=0.1091 critic_loss=131312604501.3333 entropy=3.4470 ent_coef=0.001899 approx_kl=0.0048 kl_stop=1 intervention_rate=0.0951 front_blocked=0
|
|
[Episode 11220] reward=-61078297.7 actor_loss=0.1119 critic_loss=127527002794.6667 entropy=3.4456 ent_coef=0.001899 approx_kl=0.0023 kl_stop=1 intervention_rate=0.0866 front_blocked=0
|
|
[Eval 11220] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-358871.2 mean_steps=15.4
|
|
[Episode 11230] reward=-60705984.3 actor_loss=0.1312 critic_loss=125747576832.0000 entropy=3.4458 ent_coef=0.001899 approx_kl=0.0036 kl_stop=1 intervention_rate=0.0898 front_blocked=0
|
|
[Episode 11240] reward=-56127441.5 actor_loss=0.1555 critic_loss=126641962097.7778 entropy=3.4472 ent_coef=0.001899 approx_kl=0.0035 kl_stop=1 intervention_rate=0.0924 front_blocked=0
|
|
[Eval 11240] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-406352.4 mean_steps=14.1
|
|
[Episode 11250] reward=-54195069.3 actor_loss=0.1277 critic_loss=124532344422.4000 entropy=3.4465 ent_coef=0.001899 approx_kl=0.0040 kl_stop=1 intervention_rate=0.0801 front_blocked=0
|
|
[Episode 11260] reward=-53643364.0 actor_loss=0.1163 critic_loss=124178815658.6667 entropy=3.4489 ent_coef=0.001899 approx_kl=0.0034 kl_stop=1 intervention_rate=0.0905 front_blocked=0
|
|
[Eval 11260] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-614111.2 mean_steps=12.2
|
|
[Episode 11270] reward=-58387917.8 actor_loss=0.1384 critic_loss=133001039872.0000 entropy=3.4484 ent_coef=0.001899 approx_kl=0.0037 kl_stop=1 intervention_rate=0.0885 front_blocked=0
|
|
[Episode 11280] reward=-42599528.8 actor_loss=0.1010 critic_loss=117717135360.0000 entropy=3.4492 ent_coef=0.001898 approx_kl=0.0031 kl_stop=1 intervention_rate=0.0749 front_blocked=0
|
|
[Eval 11280] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-367637.3 mean_steps=13.9
|
|
[Episode 11290] reward=-66770727.0 actor_loss=0.1311 critic_loss=133113204736.0000 entropy=3.4479 ent_coef=0.001898 approx_kl=0.0022 kl_stop=1 intervention_rate=0.0911 front_blocked=0
|
|
[Episode 11300] reward=-57201758.1 actor_loss=0.1415 critic_loss=129833907814.4000 entropy=3.4484 ent_coef=0.001898 approx_kl=0.0051 kl_stop=1 intervention_rate=0.0866 front_blocked=0
|
|
[Eval 11300] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-444364.0 mean_steps=13.7
|
|
[Episode 11310] reward=-50164685.5 actor_loss=0.0835 critic_loss=119707611721.1429 entropy=3.4508 ent_coef=0.001898 approx_kl=0.0016 kl_stop=1 intervention_rate=0.0775 front_blocked=0
|
|
[Episode 11320] reward=-54507342.2 actor_loss=0.1216 critic_loss=126158940160.0000 entropy=3.4465 ent_coef=0.001898 approx_kl=0.0040 kl_stop=1 intervention_rate=0.0820 front_blocked=0
|
|
[Eval 11320] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-366566.3 mean_steps=14.6
|
|
[Episode 11330] reward=-57671995.4 actor_loss=0.1073 critic_loss=124478670848.0000 entropy=3.4458 ent_coef=0.001898 approx_kl=0.0028 kl_stop=1 intervention_rate=0.0898 front_blocked=0
|
|
[Episode 11340] reward=-53119090.3 actor_loss=0.1159 critic_loss=126369007206.4000 entropy=3.4462 ent_coef=0.001898 approx_kl=0.0021 kl_stop=1 intervention_rate=0.0814 front_blocked=0
|
|
[Eval 11340] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-349073.6 mean_steps=13.8
|
|
[Episode 11350] reward=-51172885.6 actor_loss=0.0887 critic_loss=126446767672.8889 entropy=3.4471 ent_coef=0.001898 approx_kl=0.0033 kl_stop=1 intervention_rate=0.0781 front_blocked=0
|
|
[Episode 11360] reward=-55809516.0 actor_loss=0.1292 critic_loss=126759213056.0000 entropy=3.4477 ent_coef=0.001898 approx_kl=0.0026 kl_stop=1 intervention_rate=0.0827 front_blocked=0
|
|
[Eval 11360] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-514101.6 mean_steps=12.3
|
|
[Episode 11370] reward=-53235018.0 actor_loss=0.1098 critic_loss=125601516544.0000 entropy=3.4438 ent_coef=0.001898 approx_kl=0.0032 kl_stop=1 intervention_rate=0.0807 front_blocked=0
|
|
[Episode 11380] reward=-52046982.0 actor_loss=0.1433 critic_loss=127035628544.0000 entropy=3.4424 ent_coef=0.001898 approx_kl=0.0024 kl_stop=1 intervention_rate=0.0879 front_blocked=0
|
|
[Eval 11380] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-649371.0 mean_steps=11.6
|
|
[Episode 11390] reward=-44931935.7 actor_loss=0.0983 critic_loss=118123477219.5556 entropy=3.4432 ent_coef=0.001897 approx_kl=0.0033 kl_stop=1 intervention_rate=0.0736 front_blocked=0
|
|
[Episode 11400] reward=-63688890.0 actor_loss=0.1044 critic_loss=132767973376.0000 entropy=3.4429 ent_coef=0.001897 approx_kl=0.0017 kl_stop=1 intervention_rate=0.0938 front_blocked=0
|
|
[Eval 11400] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-260713.8 mean_steps=15.7
|
|
[Episode 11410] reward=-63144757.0 actor_loss=0.1025 critic_loss=127862694912.0000 entropy=3.4443 ent_coef=0.001897 approx_kl=0.0010 kl_stop=1 intervention_rate=0.0892 front_blocked=0
|
|
[Episode 11420] reward=-54862831.1 actor_loss=0.1158 critic_loss=125264810569.1429 entropy=3.4452 ent_coef=0.001897 approx_kl=0.0027 kl_stop=1 intervention_rate=0.0801 front_blocked=0
|
|
[Eval 11420] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-447250.0 mean_steps=14.2
|
|
[Episode 11430] reward=-63137769.4 actor_loss=0.1331 critic_loss=130780761088.0000 entropy=3.4450 ent_coef=0.001897 approx_kl=0.0010 kl_stop=1 intervention_rate=0.0885 front_blocked=0
|
|
[Episode 11440] reward=-64182902.0 actor_loss=0.1083 critic_loss=127625138548.3636 entropy=3.4451 ent_coef=0.001897 approx_kl=0.0046 kl_stop=1 intervention_rate=0.0970 front_blocked=0
|
|
[Eval 11440] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-455184.0 mean_steps=13.3
|
|
[Episode 11450] reward=-58740281.9 actor_loss=0.1173 critic_loss=127090544054.8571 entropy=3.4422 ent_coef=0.001897 approx_kl=0.0036 kl_stop=1 intervention_rate=0.0846 front_blocked=0
|
|
[Episode 11460] reward=-61510691.3 actor_loss=0.1176 critic_loss=128647733930.6667 entropy=3.4407 ent_coef=0.001897 approx_kl=0.0054 kl_stop=1 intervention_rate=0.0905 front_blocked=0
|
|
[Eval 11460] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-490114.5 mean_steps=12.7
|
|
[Episode 11470] reward=-44871992.8 actor_loss=0.1264 critic_loss=118580871168.0000 entropy=3.4440 ent_coef=0.001897 approx_kl=0.0040 kl_stop=1 intervention_rate=0.0872 front_blocked=0
|
|
[Episode 11480] reward=-53989207.3 actor_loss=0.1609 critic_loss=125269496832.0000 entropy=3.4475 ent_coef=0.001897 approx_kl=0.0022 kl_stop=1 intervention_rate=0.0833 front_blocked=0
|
|
[Eval 11480] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-483062.8 mean_steps=12.7
|
|
[Episode 11490] reward=-53943302.4 actor_loss=0.0891 critic_loss=128843552768.0000 entropy=3.4488 ent_coef=0.001897 approx_kl=0.0017 kl_stop=1 intervention_rate=0.0723 front_blocked=0
|
|
[Episode 11500] reward=-53455123.4 actor_loss=0.1026 critic_loss=124569749504.0000 entropy=3.4507 ent_coef=0.001897 approx_kl=0.0040 kl_stop=1 intervention_rate=0.0788 front_blocked=0
|
|
[Eval 11500] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-451565.7 mean_steps=13.8
|
|
[Episode 11510] reward=-49379770.8 actor_loss=0.1192 critic_loss=124999717449.1429 entropy=3.4540 ent_coef=0.001896 approx_kl=0.0032 kl_stop=1 intervention_rate=0.0846 front_blocked=0
|
|
[Episode 11520] reward=-48304436.8 actor_loss=0.1186 critic_loss=119813272371.2000 entropy=3.4546 ent_coef=0.001896 approx_kl=0.0034 kl_stop=1 intervention_rate=0.0755 front_blocked=0
|
|
[Eval 11520] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-524334.9 mean_steps=12.5
|
|
[Episode 11530] reward=-65820135.1 actor_loss=0.1453 critic_loss=130733948928.0000 entropy=3.4550 ent_coef=0.001896 approx_kl=0.0021 kl_stop=1 intervention_rate=0.0911 front_blocked=0
|
|
[Episode 11540] reward=-51904160.7 actor_loss=0.1316 critic_loss=121369221997.7143 entropy=3.4548 ent_coef=0.001896 approx_kl=0.0014 kl_stop=1 intervention_rate=0.0807 front_blocked=0
|
|
[Eval 11540] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-394037.6 mean_steps=14.9
|
|
[Episode 11550] reward=-53493210.4 actor_loss=0.1179 critic_loss=121684694747.4286 entropy=3.4584 ent_coef=0.001896 approx_kl=0.0015 kl_stop=1 intervention_rate=0.0788 front_blocked=0
|
|
[Episode 11560] reward=-53242349.2 actor_loss=0.1117 critic_loss=123786126950.4000 entropy=3.4580 ent_coef=0.001896 approx_kl=0.0037 kl_stop=1 intervention_rate=0.0859 front_blocked=0
|
|
[Eval 11560] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-420635.5 mean_steps=14.1
|
|
[Episode 11570] reward=-42185613.1 actor_loss=0.0959 critic_loss=117362196480.0000 entropy=3.4571 ent_coef=0.001896 approx_kl=0.0034 kl_stop=1 intervention_rate=0.0768 front_blocked=0
|
|
[Episode 11580] reward=-65933815.6 actor_loss=0.1142 critic_loss=130315707733.3333 entropy=3.4564 ent_coef=0.001896 approx_kl=0.0039 kl_stop=1 intervention_rate=0.0866 front_blocked=0
|
|
[Eval 11580] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-502294.0 mean_steps=13.8
|
|
[Episode 11590] reward=-54529563.1 actor_loss=0.1231 critic_loss=125085629440.0000 entropy=3.4558 ent_coef=0.001896 approx_kl=0.0021 kl_stop=1 intervention_rate=0.0846 front_blocked=0
|
|
[Episode 11600] reward=-53940721.8 actor_loss=0.1198 critic_loss=122071961600.0000 entropy=3.4556 ent_coef=0.001896 approx_kl=0.0029 kl_stop=1 intervention_rate=0.0846 front_blocked=0
|
|
[Eval 11600] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-441260.0 mean_steps=13.5
|
|
[Episode 11610] reward=-59625267.6 actor_loss=0.1225 critic_loss=127683965747.2000 entropy=3.4569 ent_coef=0.001896 approx_kl=0.0055 kl_stop=1 intervention_rate=0.0859 front_blocked=0
|
|
[Episode 11620] reward=-55378899.8 actor_loss=0.1121 critic_loss=126994027861.3333 entropy=3.4600 ent_coef=0.001895 approx_kl=0.0028 kl_stop=1 intervention_rate=0.0846 front_blocked=0
|
|
[Eval 11620] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-499244.8 mean_steps=13.2
|
|
[Episode 11630] reward=-48375440.1 actor_loss=0.0872 critic_loss=120943255552.0000 entropy=3.4593 ent_coef=0.001895 approx_kl=0.0057 kl_stop=1 intervention_rate=0.0736 front_blocked=0
|
|
[Episode 11640] reward=-49656712.1 actor_loss=0.0841 critic_loss=122364518400.0000 entropy=3.4591 ent_coef=0.001895 approx_kl=0.0028 kl_stop=1 intervention_rate=0.0853 front_blocked=0
|
|
[Eval 11640] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-486713.5 mean_steps=13.8
|
|
[Episode 11650] reward=-45077604.8 actor_loss=0.1200 critic_loss=119099542186.6667 entropy=3.4606 ent_coef=0.001895 approx_kl=0.0018 kl_stop=1 intervention_rate=0.0749 front_blocked=0
|
|
[Episode 11660] reward=-43855954.4 actor_loss=0.0604 critic_loss=119098599424.0000 entropy=3.4588 ent_coef=0.001895 approx_kl=0.0035 kl_stop=1 intervention_rate=0.0723 front_blocked=0
|
|
[Eval 11660] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-357405.6 mean_steps=15.3
|
|
[Episode 11670] reward=-46348976.6 actor_loss=0.1154 critic_loss=118046964394.6667 entropy=3.4602 ent_coef=0.001895 approx_kl=0.0043 kl_stop=1 intervention_rate=0.0716 front_blocked=0
|
|
[Episode 11680] reward=-61976627.8 actor_loss=0.1344 critic_loss=131615586011.4286 entropy=3.4619 ent_coef=0.001895 approx_kl=0.0025 kl_stop=1 intervention_rate=0.0794 front_blocked=0
|
|
[Eval 11680] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-495918.9 mean_steps=13.7
|
|
[Episode 11690] reward=-61668972.8 actor_loss=0.1081 critic_loss=128302427340.8000 entropy=3.4623 ent_coef=0.001895 approx_kl=0.0013 kl_stop=1 intervention_rate=0.0807 front_blocked=0
|
|
[Episode 11700] reward=-49525390.7 actor_loss=0.1136 critic_loss=123427162794.6667 entropy=3.4609 ent_coef=0.001895 approx_kl=0.0071 kl_stop=1 intervention_rate=0.0814 front_blocked=0
|
|
[Eval 11700] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-635380.1 mean_steps=11.0
|
|
[Episode 11710] reward=-44706655.6 actor_loss=0.1303 critic_loss=119365408475.4286 entropy=3.4584 ent_coef=0.001895 approx_kl=0.0022 kl_stop=1 intervention_rate=0.0781 front_blocked=0
|
|
[Episode 11720] reward=-40590544.8 actor_loss=0.1082 critic_loss=118192663552.0000 entropy=3.4590 ent_coef=0.001895 approx_kl=0.0047 kl_stop=1 intervention_rate=0.0684 front_blocked=0
|
|
[Eval 11720] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-564342.5 mean_steps=11.0
|
|
[Episode 11730] reward=-60359134.0 actor_loss=0.1449 critic_loss=128894386176.0000 entropy=3.4572 ent_coef=0.001894 approx_kl=0.0060 kl_stop=1 intervention_rate=0.0951 front_blocked=0
|
|
[Episode 11740] reward=-63278032.8 actor_loss=0.1383 critic_loss=130816773688.8889 entropy=3.4547 ent_coef=0.001894 approx_kl=0.0028 kl_stop=1 intervention_rate=0.0898 front_blocked=0
|
|
[Eval 11740] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-567060.4 mean_steps=12.2
|
|
[Episode 11750] reward=-43049482.8 actor_loss=0.1230 critic_loss=118757880627.2000 entropy=3.4535 ent_coef=0.001894 approx_kl=-0.0004 kl_stop=1 intervention_rate=0.0833 front_blocked=0
|
|
[Episode 11760] reward=-60184019.0 actor_loss=0.0947 critic_loss=131791562752.0000 entropy=3.4554 ent_coef=0.001894 approx_kl=0.0026 kl_stop=1 intervention_rate=0.0820 front_blocked=0
|
|
[Eval 11760] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-456540.4 mean_steps=12.8
|
|
[Episode 11770] reward=-47616249.0 actor_loss=0.1050 critic_loss=122878365110.8571 entropy=3.4557 ent_coef=0.001894 approx_kl=0.0034 kl_stop=1 intervention_rate=0.0723 front_blocked=0
|
|
[Episode 11780] reward=-60116856.9 actor_loss=0.1125 critic_loss=125371662336.0000 entropy=3.4581 ent_coef=0.001894 approx_kl=0.0042 kl_stop=1 intervention_rate=0.0931 front_blocked=0
|
|
[Eval 11780] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-441584.2 mean_steps=13.6
|
|
[Episode 11790] reward=-49232951.9 actor_loss=0.1026 critic_loss=119463178240.0000 entropy=3.4560 ent_coef=0.001894 approx_kl=0.0008 kl_stop=1 intervention_rate=0.0775 front_blocked=0
|
|
[Episode 11800] reward=-67080627.3 actor_loss=0.1175 critic_loss=137589044317.0909 entropy=3.4538 ent_coef=0.001894 approx_kl=0.0028 kl_stop=1 intervention_rate=0.0977 front_blocked=0
|
|
[Eval 11800] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-550199.0 mean_steps=12.7
|
|
[Episode 11810] reward=-58771317.1 actor_loss=0.1109 critic_loss=124355534848.0000 entropy=3.4566 ent_coef=0.001894 approx_kl=0.0038 kl_stop=1 intervention_rate=0.0892 front_blocked=0
|
|
[Episode 11820] reward=-60840854.3 actor_loss=0.1456 critic_loss=127057862656.0000 entropy=3.4534 ent_coef=0.001894 approx_kl=0.0013 kl_stop=1 intervention_rate=0.0957 front_blocked=0
|
|
[Eval 11820] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-594454.6 mean_steps=11.2
|
|
[Episode 11830] reward=-48101497.1 actor_loss=0.1094 critic_loss=123446239232.0000 entropy=3.4549 ent_coef=0.001894 approx_kl=0.0030 kl_stop=1 intervention_rate=0.0794 front_blocked=0
|
|
[Episode 11840] reward=-51239178.8 actor_loss=0.1553 critic_loss=121740720537.6000 entropy=3.4537 ent_coef=0.001893 approx_kl=0.0039 kl_stop=1 intervention_rate=0.0833 front_blocked=0
|
|
[Eval 11840] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-594994.3 mean_steps=11.0
|
|
[Episode 11850] reward=-52107696.8 actor_loss=0.1245 critic_loss=121968189440.0000 entropy=3.4530 ent_coef=0.001893 approx_kl=0.0039 kl_stop=1 intervention_rate=0.0827 front_blocked=0
|
|
[Episode 11860] reward=-55960590.2 actor_loss=0.1219 critic_loss=129043176652.8000 entropy=3.4541 ent_coef=0.001893 approx_kl=0.0008 kl_stop=1 intervention_rate=0.0905 front_blocked=0
|
|
[Eval 11860] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-404757.5 mean_steps=13.2
|
|
[Episode 11870] reward=-56244955.9 actor_loss=0.1235 critic_loss=128938494976.0000 entropy=3.4551 ent_coef=0.001893 approx_kl=0.0052 kl_stop=1 intervention_rate=0.0853 front_blocked=0
|
|
[Episode 11880] reward=-54175253.3 actor_loss=0.1207 critic_loss=124327796736.0000 entropy=3.4525 ent_coef=0.001893 approx_kl=0.0068 kl_stop=1 intervention_rate=0.0872 front_blocked=0
|
|
[Eval 11880] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-417011.3 mean_steps=14.8
|
|
[Episode 11890] reward=-62678132.9 actor_loss=0.1226 critic_loss=128452295884.8000 entropy=3.4498 ent_coef=0.001893 approx_kl=0.0043 kl_stop=1 intervention_rate=0.0924 front_blocked=0
|
|
[Episode 11900] reward=-53469344.8 actor_loss=0.1269 critic_loss=123017427899.7333 entropy=3.4515 ent_coef=0.001893 approx_kl=0.0020 kl_stop=1 intervention_rate=0.0814 front_blocked=0
|
|
[Eval 11900] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-440178.8 mean_steps=13.3
|
|
[Episode 11910] reward=-63601475.1 actor_loss=0.1109 critic_loss=130817310720.0000 entropy=3.4515 ent_coef=0.001893 approx_kl=0.0017 kl_stop=1 intervention_rate=0.0879 front_blocked=0
|
|
[Episode 11920] reward=-58652243.3 actor_loss=0.1353 critic_loss=127052593379.5556 entropy=3.4526 ent_coef=0.001893 approx_kl=0.0035 kl_stop=1 intervention_rate=0.0970 front_blocked=0
|
|
[Eval 11920] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-501752.5 mean_steps=13.0
|
|
[Episode 11930] reward=-45339691.8 actor_loss=0.1099 critic_loss=116784394240.0000 entropy=3.4522 ent_coef=0.001893 approx_kl=0.0052 kl_stop=1 intervention_rate=0.0762 front_blocked=0
|
|
[Episode 11940] reward=-56752366.5 actor_loss=0.0991 critic_loss=126841699441.7778 entropy=3.4520 ent_coef=0.001893 approx_kl=0.0027 kl_stop=1 intervention_rate=0.0820 front_blocked=0
|
|
[Eval 11940] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-544629.2 mean_steps=12.3
|
|
[Episode 11950] reward=-57561474.4 actor_loss=0.1475 critic_loss=124292471193.6000 entropy=3.4497 ent_coef=0.001892 approx_kl=0.0031 kl_stop=1 intervention_rate=0.0970 front_blocked=0
|
|
[Episode 11960] reward=-48905199.7 actor_loss=0.1099 critic_loss=121634409813.3333 entropy=3.4467 ent_coef=0.001892 approx_kl=0.0029 kl_stop=1 intervention_rate=0.0859 front_blocked=0
|
|
[Eval 11960] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-329933.3 mean_steps=14.2
|
|
[Episode 11970] reward=-71337102.8 actor_loss=0.1388 critic_loss=137182386734.5455 entropy=3.4492 ent_coef=0.001892 approx_kl=0.0041 kl_stop=1 intervention_rate=0.0957 front_blocked=0
|
|
[Episode 11980] reward=-59103047.0 actor_loss=0.1335 critic_loss=128240995532.8000 entropy=3.4516 ent_coef=0.001892 approx_kl=0.0028 kl_stop=1 intervention_rate=0.0911 front_blocked=0
|
|
[Eval 11980] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-380867.2 mean_steps=14.3
|
|
[Episode 11990] reward=-54424341.2 actor_loss=0.1298 critic_loss=124417988608.0000 entropy=3.4530 ent_coef=0.001892 approx_kl=0.0050 kl_stop=1 intervention_rate=0.0879 front_blocked=0
|
|
[Episode 12000] reward=-54602086.1 actor_loss=0.0906 critic_loss=122588253184.0000 entropy=3.4535 ent_coef=0.001892 approx_kl=0.0020 kl_stop=1 intervention_rate=0.0833 front_blocked=0
|
|
[Eval 12000] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-331637.2 mean_steps=14.7
|
|
[Episode 12010] reward=-51681769.9 actor_loss=0.1496 critic_loss=121901686784.0000 entropy=3.4532 ent_coef=0.001892 approx_kl=0.0020 kl_stop=1 intervention_rate=0.0846 front_blocked=0
|
|
[Episode 12020] reward=-50601622.8 actor_loss=0.1061 critic_loss=121317265993.1429 entropy=3.4533 ent_coef=0.001892 approx_kl=0.0043 kl_stop=1 intervention_rate=0.0801 front_blocked=0
|
|
[Eval 12020] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-616431.7 mean_steps=12.0
|
|
[Episode 12030] reward=-58765838.5 actor_loss=0.1500 critic_loss=126516576256.0000 entropy=3.4518 ent_coef=0.001892 approx_kl=0.0023 kl_stop=1 intervention_rate=0.0924 front_blocked=0
|
|
[Episode 12040] reward=-59512137.6 actor_loss=0.1011 critic_loss=123647333717.3333 entropy=3.4501 ent_coef=0.001892 approx_kl=0.0006 kl_stop=1 intervention_rate=0.0872 front_blocked=0
|
|
[Eval 12040] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-597838.8 mean_steps=11.2
|
|
[Episode 12050] reward=-56675140.3 actor_loss=0.1329 critic_loss=123732797030.4000 entropy=3.4515 ent_coef=0.001892 approx_kl=0.0018 kl_stop=1 intervention_rate=0.0905 front_blocked=0
|
|
[Episode 12060] reward=-51462080.4 actor_loss=0.0994 critic_loss=120367551488.0000 entropy=3.4505 ent_coef=0.001891 approx_kl=0.0014 kl_stop=1 intervention_rate=0.0762 front_blocked=0
|
|
[Eval 12060] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-567871.9 mean_steps=11.6
|
|
[Episode 12070] reward=-57335197.1 actor_loss=0.1053 critic_loss=122445555302.4000 entropy=3.4504 ent_coef=0.001891 approx_kl=0.0021 kl_stop=1 intervention_rate=0.0872 front_blocked=0
|
|
[Episode 12080] reward=-56324518.1 actor_loss=0.1056 critic_loss=126838810916.5714 entropy=3.4481 ent_coef=0.001891 approx_kl=0.0069 kl_stop=1 intervention_rate=0.0872 front_blocked=0
|
|
[Eval 12080] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-396674.8 mean_steps=13.9
|
|
[Episode 12090] reward=-45414510.1 actor_loss=0.1229 critic_loss=120749321654.8571 entropy=3.4470 ent_coef=0.001891 approx_kl=0.0042 kl_stop=1 intervention_rate=0.0768 front_blocked=0
|
|
[Episode 12100] reward=-59903937.6 actor_loss=0.1330 critic_loss=129709907968.0000 entropy=3.4481 ent_coef=0.001891 approx_kl=0.0029 kl_stop=1 intervention_rate=0.0853 front_blocked=0
|
|
[Eval 12100] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-496460.1 mean_steps=12.2
|
|
[Episode 12110] reward=-50435667.3 actor_loss=0.1555 critic_loss=120354906112.0000 entropy=3.4498 ent_coef=0.001891 approx_kl=0.0035 kl_stop=1 intervention_rate=0.0827 front_blocked=0
|
|
[Episode 12120] reward=-54933407.1 actor_loss=0.0800 critic_loss=118851889152.0000 entropy=3.4485 ent_coef=0.001891 approx_kl=0.0021 kl_stop=1 intervention_rate=0.0794 front_blocked=0
|
|
[Eval 12120] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-455502.7 mean_steps=13.8
|
|
[Episode 12130] reward=-51749640.5 actor_loss=0.1237 critic_loss=125031557802.6667 entropy=3.4471 ent_coef=0.001891 approx_kl=0.0021 kl_stop=1 intervention_rate=0.0781 front_blocked=0
|
|
[Episode 12140] reward=-54891683.1 actor_loss=0.1103 critic_loss=120929221485.7143 entropy=3.4471 ent_coef=0.001891 approx_kl=0.0042 kl_stop=1 intervention_rate=0.0801 front_blocked=0
|
|
[Eval 12140] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-566053.4 mean_steps=11.9
|
|
[Episode 12150] reward=-51665713.9 actor_loss=0.1296 critic_loss=121063147520.0000 entropy=3.4463 ent_coef=0.001891 approx_kl=0.0053 kl_stop=1 intervention_rate=0.0807 front_blocked=0
|
|
[Episode 12160] reward=-52248945.5 actor_loss=0.0941 critic_loss=121943927714.9091 entropy=3.4472 ent_coef=0.001891 approx_kl=0.0018 kl_stop=1 intervention_rate=0.0755 front_blocked=0
|
|
[Eval 12160] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-367047.4 mean_steps=14.7
|
|
[Episode 12170] reward=-53913253.2 actor_loss=0.1177 critic_loss=122098421760.0000 entropy=3.4491 ent_coef=0.001890 approx_kl=0.0016 kl_stop=1 intervention_rate=0.0827 front_blocked=0
|
|
[Episode 12180] reward=-56280617.3 actor_loss=0.1206 critic_loss=127826505728.0000 entropy=3.4501 ent_coef=0.001890 approx_kl=0.0067 kl_stop=1 intervention_rate=0.0840 front_blocked=0
|
|
[Eval 12180] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-549353.6 mean_steps=11.9
|
|
[Episode 12190] reward=-54116965.1 actor_loss=0.1181 critic_loss=124351460966.4000 entropy=3.4516 ent_coef=0.001890 approx_kl=0.0016 kl_stop=1 intervention_rate=0.0866 front_blocked=0
|
|
[Episode 12200] reward=-48017261.2 actor_loss=0.1470 critic_loss=118937377905.7778 entropy=3.4495 ent_coef=0.001890 approx_kl=0.0034 kl_stop=1 intervention_rate=0.0807 front_blocked=0
|
|
[Eval 12200] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-470856.9 mean_steps=13.0
|
|
[Episode 12210] reward=-50173338.0 actor_loss=0.1134 critic_loss=117441834422.8571 entropy=3.4475 ent_coef=0.001890 approx_kl=0.0029 kl_stop=1 intervention_rate=0.0814 front_blocked=0
|
|
[Episode 12220] reward=-60316125.6 actor_loss=0.1212 critic_loss=130013151232.0000 entropy=3.4483 ent_coef=0.001890 approx_kl=0.0022 kl_stop=1 intervention_rate=0.0833 front_blocked=0
|
|
[Eval 12220] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-516490.0 mean_steps=13.2
|
|
[Episode 12230] reward=-51279923.4 actor_loss=0.1189 critic_loss=121158474752.0000 entropy=3.4460 ent_coef=0.001890 approx_kl=0.0011 kl_stop=1 intervention_rate=0.0781 front_blocked=0
|
|
[Episode 12240] reward=-45803579.9 actor_loss=0.0922 critic_loss=121036145664.0000 entropy=3.4468 ent_coef=0.001890 approx_kl=0.0044 kl_stop=1 intervention_rate=0.0807 front_blocked=0
|
|
[Eval 12240] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-588833.5 mean_steps=12.3
|
|
[Episode 12250] reward=-49987916.3 actor_loss=0.1295 critic_loss=122649020416.0000 entropy=3.4471 ent_coef=0.001890 approx_kl=-0.0003 kl_stop=1 intervention_rate=0.0814 front_blocked=0
|
|
[Episode 12260] reward=-56130386.4 actor_loss=0.1158 critic_loss=126466074624.0000 entropy=3.4493 ent_coef=0.001890 approx_kl=0.0038 kl_stop=1 intervention_rate=0.0820 front_blocked=0
|
|
[Eval 12260] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-432911.6 mean_steps=12.7
|
|
[Episode 12270] reward=-51595017.0 actor_loss=0.0987 critic_loss=122979463168.0000 entropy=3.4515 ent_coef=0.001890 approx_kl=0.0031 kl_stop=1 intervention_rate=0.0788 front_blocked=0
|
|
[Episode 12280] reward=-59084579.8 actor_loss=0.0954 critic_loss=129145886947.5556 entropy=3.4507 ent_coef=0.001889 approx_kl=0.0049 kl_stop=1 intervention_rate=0.0872 front_blocked=0
|
|
[Eval 12280] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-497495.5 mean_steps=13.1
|
|
[Episode 12290] reward=-43529426.5 actor_loss=0.1161 critic_loss=120126702387.2000 entropy=3.4533 ent_coef=0.001889 approx_kl=0.0035 kl_stop=1 intervention_rate=0.0768 front_blocked=0
|
|
[Episode 12300] reward=-54042339.8 actor_loss=0.1211 critic_loss=126121921442.9091 entropy=3.4547 ent_coef=0.001889 approx_kl=0.0025 kl_stop=1 intervention_rate=0.0794 front_blocked=0
|
|
[Eval 12300] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-413045.5 mean_steps=12.6
|
|
[Episode 12310] reward=-37439427.6 actor_loss=0.1080 critic_loss=115279789169.7778 entropy=3.4580 ent_coef=0.001889 approx_kl=0.0033 kl_stop=1 intervention_rate=0.0749 front_blocked=0
|
|
[Episode 12320] reward=-34434119.4 actor_loss=0.1002 critic_loss=114702191001.6000 entropy=3.4585 ent_coef=0.001889 approx_kl=0.0017 kl_stop=1 intervention_rate=0.0664 front_blocked=0
|
|
[Eval 12320] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-381215.1 mean_steps=13.9
|
|
[Episode 12330] reward=-47535449.2 actor_loss=0.1262 critic_loss=121686851584.0000 entropy=3.4597 ent_coef=0.001889 approx_kl=0.0015 kl_stop=1 intervention_rate=0.0729 front_blocked=0
|
|
[Episode 12340] reward=-58302369.7 actor_loss=0.1290 critic_loss=124835192012.8000 entropy=3.4602 ent_coef=0.001889 approx_kl=0.0025 kl_stop=1 intervention_rate=0.0853 front_blocked=0
|
|
[Eval 12340] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-448125.6 mean_steps=15.4
|
|
[Episode 12350] reward=-53359635.6 actor_loss=0.0936 critic_loss=119716498090.6667 entropy=3.4561 ent_coef=0.001889 approx_kl=0.0029 kl_stop=1 intervention_rate=0.0872 front_blocked=0
|
|
[Episode 12360] reward=-56467664.9 actor_loss=0.1109 critic_loss=126301826126.7692 entropy=3.4595 ent_coef=0.001889 approx_kl=0.0017 kl_stop=1 intervention_rate=0.0833 front_blocked=0
|
|
[Eval 12360] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-365455.7 mean_steps=14.8
|
|
[Episode 12370] reward=-51135672.9 actor_loss=0.0786 critic_loss=123388485632.0000 entropy=3.4576 ent_coef=0.001889 approx_kl=0.0045 kl_stop=1 intervention_rate=0.0801 front_blocked=0
|
|
[Episode 12380] reward=-53715848.2 actor_loss=0.1126 critic_loss=123143651328.0000 entropy=3.4590 ent_coef=0.001889 approx_kl=0.0047 kl_stop=1 intervention_rate=0.0898 front_blocked=0
|
|
[Eval 12380] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-463193.2 mean_steps=14.4
|
|
[Episode 12390] reward=-66615251.7 actor_loss=0.1318 critic_loss=131790895662.5455 entropy=3.4578 ent_coef=0.001888 approx_kl=0.0044 kl_stop=1 intervention_rate=0.0918 front_blocked=0
|
|
[Episode 12400] reward=-52556992.6 actor_loss=0.1110 critic_loss=126437467136.0000 entropy=3.4580 ent_coef=0.001888 approx_kl=0.0031 kl_stop=1 intervention_rate=0.0833 front_blocked=0
|
|
[Eval 12400] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-481238.0 mean_steps=12.8
|
|
[Episode 12410] reward=-53847424.6 actor_loss=0.1154 critic_loss=125874632704.0000 entropy=3.4620 ent_coef=0.001888 approx_kl=0.0049 kl_stop=1 intervention_rate=0.0827 front_blocked=0
|
|
[Episode 12420] reward=-43492086.3 actor_loss=0.1041 critic_loss=119416448146.2857 entropy=3.4605 ent_coef=0.001888 approx_kl=0.0049 kl_stop=1 intervention_rate=0.0749 front_blocked=0
|
|
[Eval 12420] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-637092.0 mean_steps=11.4
|
|
[Episode 12430] reward=-57335589.2 actor_loss=0.1374 critic_loss=127028467029.3333 entropy=3.4594 ent_coef=0.001888 approx_kl=0.0053 kl_stop=1 intervention_rate=0.0846 front_blocked=0
|
|
[Episode 12440] reward=-60225118.1 actor_loss=0.1277 critic_loss=128136402013.0909 entropy=3.4571 ent_coef=0.001888 approx_kl=0.0034 kl_stop=1 intervention_rate=0.0820 front_blocked=0
|
|
[Eval 12440] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-467804.3 mean_steps=11.9
|
|
[Episode 12450] reward=-49376996.4 actor_loss=0.0981 critic_loss=123138013184.0000 entropy=3.4562 ent_coef=0.001888 approx_kl=0.0037 kl_stop=1 intervention_rate=0.0749 front_blocked=0
|
|
[Episode 12460] reward=-53647627.5 actor_loss=0.1165 critic_loss=120564786790.4000 entropy=3.4552 ent_coef=0.001888 approx_kl=0.0040 kl_stop=1 intervention_rate=0.0827 front_blocked=0
|
|
[Eval 12460] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-393703.3 mean_steps=14.8
|
|
[Episode 12470] reward=-55188255.0 actor_loss=0.1367 critic_loss=127498121588.3636 entropy=3.4542 ent_coef=0.001888 approx_kl=0.0031 kl_stop=1 intervention_rate=0.0866 front_blocked=0
|
|
[Episode 12480] reward=-53475068.6 actor_loss=0.1220 critic_loss=123833284765.5385 entropy=3.4549 ent_coef=0.001888 approx_kl=0.0017 kl_stop=1 intervention_rate=0.0846 front_blocked=0
|
|
[Eval 12480] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-623264.3 mean_steps=11.4
|
|
[Episode 12490] reward=-53340761.9 actor_loss=0.1324 critic_loss=123569198694.4000 entropy=3.4530 ent_coef=0.001888 approx_kl=0.0031 kl_stop=1 intervention_rate=0.0853 front_blocked=0
|
|
[Episode 12500] reward=-46176429.0 actor_loss=0.1333 critic_loss=121368442880.0000 entropy=3.4541 ent_coef=0.001888 approx_kl=0.0032 kl_stop=1 intervention_rate=0.0716 front_blocked=0
|
|
[Eval 12500] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-500920.3 mean_steps=13.9
|
|
[Episode 12510] reward=-49801657.9 actor_loss=0.1001 critic_loss=122546331648.0000 entropy=3.4545 ent_coef=0.001887 approx_kl=0.0038 kl_stop=1 intervention_rate=0.0794 front_blocked=0
|
|
[Episode 12520] reward=-45979564.8 actor_loss=0.1070 critic_loss=122250037930.6667 entropy=3.4573 ent_coef=0.001887 approx_kl=0.0022 kl_stop=1 intervention_rate=0.0755 front_blocked=0
|
|
[Eval 12520] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-477244.9 mean_steps=13.8
|
|
[Episode 12530] reward=-53807375.8 actor_loss=0.1032 critic_loss=128185527864.8889 entropy=3.4581 ent_coef=0.001887 approx_kl=0.0024 kl_stop=1 intervention_rate=0.0762 front_blocked=0
|
|
[Episode 12540] reward=-60005408.3 actor_loss=0.1510 critic_loss=128109118350.2222 entropy=3.4552 ent_coef=0.001887 approx_kl=0.0047 kl_stop=1 intervention_rate=0.0885 front_blocked=0
|
|
[Eval 12540] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-492611.9 mean_steps=13.1
|
|
[Episode 12550] reward=-43027845.1 actor_loss=0.1194 critic_loss=119470769298.2857 entropy=3.4577 ent_coef=0.001887 approx_kl=0.0040 kl_stop=1 intervention_rate=0.0742 front_blocked=0
|
|
[Episode 12560] reward=-54583027.8 actor_loss=0.1340 critic_loss=128976695668.3636 entropy=3.4562 ent_coef=0.001887 approx_kl=0.0015 kl_stop=1 intervention_rate=0.0801 front_blocked=0
|
|
[Eval 12560] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-595033.3 mean_steps=11.4
|
|
[Episode 12570] reward=-50802786.3 actor_loss=0.1358 critic_loss=124357686613.3333 entropy=3.4600 ent_coef=0.001887 approx_kl=0.0025 kl_stop=1 intervention_rate=0.0846 front_blocked=0
|
|
[Episode 12580] reward=-55928301.7 actor_loss=0.1037 critic_loss=125195705457.7778 entropy=3.4592 ent_coef=0.001887 approx_kl=0.0044 kl_stop=1 intervention_rate=0.0840 front_blocked=0
|
|
[Eval 12580] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-525089.2 mean_steps=12.6
|
|
[Episode 12590] reward=-48839893.5 actor_loss=0.1250 critic_loss=124737255833.6000 entropy=3.4588 ent_coef=0.001887 approx_kl=0.0003 kl_stop=1 intervention_rate=0.0801 front_blocked=0
|
|
[Episode 12600] reward=-55556265.3 actor_loss=0.1013 critic_loss=124248673186.9091 entropy=3.4597 ent_coef=0.001887 approx_kl=0.0044 kl_stop=1 intervention_rate=0.0768 front_blocked=0
|
|
[Eval 12600] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-489704.8 mean_steps=12.3
|
|
[Episode 12610] reward=-48485172.2 actor_loss=0.1271 critic_loss=120717388068.5714 entropy=3.4563 ent_coef=0.001887 approx_kl=0.0023 kl_stop=1 intervention_rate=0.0820 front_blocked=0
|
|
[Episode 12620] reward=-64804165.9 actor_loss=0.1189 critic_loss=128307281920.0000 entropy=3.4558 ent_coef=0.001886 approx_kl=0.0026 kl_stop=1 intervention_rate=0.0938 front_blocked=0
|
|
[Eval 12620] success_rate=0.650 qp_infeasible_rate=0.350 mean_return=-212594.9 mean_steps=15.8
|
|
[Episode 12630] reward=-48171498.5 actor_loss=0.1171 critic_loss=121738960265.8462 entropy=3.4518 ent_coef=0.001886 approx_kl=0.0032 kl_stop=1 intervention_rate=0.0820 front_blocked=0
|
|
[Episode 12640] reward=-51940635.8 actor_loss=0.1222 critic_loss=125886504960.0000 entropy=3.4527 ent_coef=0.001886 approx_kl=0.0028 kl_stop=1 intervention_rate=0.0762 front_blocked=0
|
|
[Eval 12640] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-405310.0 mean_steps=13.4
|
|
[Episode 12650] reward=-45854384.2 actor_loss=0.1500 critic_loss=117877015552.0000 entropy=3.4500 ent_coef=0.001886 approx_kl=0.0036 kl_stop=1 intervention_rate=0.0827 front_blocked=0
|
|
[Episode 12660] reward=-55836685.5 actor_loss=0.1543 critic_loss=122930972444.4444 entropy=3.4517 ent_coef=0.001886 approx_kl=0.0035 kl_stop=1 intervention_rate=0.0898 front_blocked=0
|
|
[Eval 12660] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-587102.8 mean_steps=12.1
|
|
[Episode 12670] reward=-50409379.0 actor_loss=0.0836 critic_loss=123397840896.0000 entropy=3.4517 ent_coef=0.001886 approx_kl=0.0051 kl_stop=1 intervention_rate=0.0775 front_blocked=0
|
|
[Episode 12680] reward=-46020699.0 actor_loss=0.1115 critic_loss=120192732598.8571 entropy=3.4532 ent_coef=0.001886 approx_kl=0.0022 kl_stop=1 intervention_rate=0.0840 front_blocked=0
|
|
[Eval 12680] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-401214.6 mean_steps=14.7
|
|
[Episode 12690] reward=-65090514.2 actor_loss=0.1080 critic_loss=130431628288.0000 entropy=3.4564 ent_coef=0.001886 approx_kl=0.0033 kl_stop=1 intervention_rate=0.0944 front_blocked=0
|
|
[Episode 12700] reward=-51424957.3 actor_loss=0.1361 critic_loss=122872338432.0000 entropy=3.4550 ent_coef=0.001886 approx_kl=0.0054 kl_stop=1 intervention_rate=0.0846 front_blocked=0
|
|
[Eval 12700] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-428581.0 mean_steps=12.7
|
|
[Episode 12710] reward=-56411704.8 actor_loss=0.1322 critic_loss=127804372309.3333 entropy=3.4581 ent_coef=0.001886 approx_kl=0.0015 kl_stop=1 intervention_rate=0.0866 front_blocked=0
|
|
[Episode 12720] reward=-55672972.5 actor_loss=0.0862 critic_loss=128548231168.0000 entropy=3.4595 ent_coef=0.001886 approx_kl=0.0024 kl_stop=1 intervention_rate=0.0755 front_blocked=0
|
|
[Eval 12720] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-434657.8 mean_steps=14.0
|
|
[Episode 12730] reward=-39074229.0 actor_loss=0.0941 critic_loss=114106271061.3333 entropy=3.4596 ent_coef=0.001885 approx_kl=0.0033 kl_stop=1 intervention_rate=0.0664 front_blocked=0
|
|
[Episode 12740] reward=-60172328.0 actor_loss=0.0998 critic_loss=131836797838.2222 entropy=3.4567 ent_coef=0.001885 approx_kl=0.0023 kl_stop=1 intervention_rate=0.0872 front_blocked=0
|
|
[Eval 12740] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-334670.7 mean_steps=14.6
|
|
[Episode 12750] reward=-48767458.7 actor_loss=0.1138 critic_loss=119955902464.0000 entropy=3.4585 ent_coef=0.001885 approx_kl=0.0022 kl_stop=1 intervention_rate=0.0859 front_blocked=0
|
|
[Episode 12760] reward=-67417164.6 actor_loss=0.1319 critic_loss=134342529609.1429 entropy=3.4588 ent_coef=0.001885 approx_kl=0.0060 kl_stop=1 intervention_rate=0.0872 front_blocked=0
|
|
[Eval 12760] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-425370.9 mean_steps=14.3
|
|
[Episode 12770] reward=-55763887.3 actor_loss=0.1268 critic_loss=126937536325.8182 entropy=3.4583 ent_coef=0.001885 approx_kl=0.0037 kl_stop=1 intervention_rate=0.0840 front_blocked=0
|
|
[Episode 12780] reward=-49049599.3 actor_loss=0.1318 critic_loss=119329705984.0000 entropy=3.4586 ent_coef=0.001885 approx_kl=0.0020 kl_stop=1 intervention_rate=0.0840 front_blocked=0
|
|
[Eval 12780] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-614751.7 mean_steps=11.3
|
|
[Episode 12790] reward=-49477210.8 actor_loss=0.1219 critic_loss=120121815859.2000 entropy=3.4557 ent_coef=0.001885 approx_kl=0.0024 kl_stop=1 intervention_rate=0.0807 front_blocked=0
|
|
[Episode 12800] reward=-51544533.5 actor_loss=0.0915 critic_loss=122418877440.0000 entropy=3.4545 ent_coef=0.001885 approx_kl=0.0012 kl_stop=1 intervention_rate=0.0788 front_blocked=0
|
|
[Eval 12800] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-571476.8 mean_steps=12.1
|
|
[Episode 12810] reward=-52554303.5 actor_loss=0.1106 critic_loss=119585644544.0000 entropy=3.4575 ent_coef=0.001885 approx_kl=0.0055 kl_stop=1 intervention_rate=0.0814 front_blocked=0
|
|
[Episode 12820] reward=-64540527.4 actor_loss=0.1206 critic_loss=130686801920.0000 entropy=3.4560 ent_coef=0.001885 approx_kl=0.0020 kl_stop=1 intervention_rate=0.0879 front_blocked=0
|
|
[Eval 12820] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-501087.1 mean_steps=14.0
|
|
[Episode 12830] reward=-51885421.0 actor_loss=0.1219 critic_loss=124947625053.0909 entropy=3.4564 ent_coef=0.001885 approx_kl=0.0012 kl_stop=1 intervention_rate=0.0840 front_blocked=0
|
|
[Episode 12840] reward=-42870527.3 actor_loss=0.1287 critic_loss=121855355904.0000 entropy=3.4528 ent_coef=0.001884 approx_kl=0.0040 kl_stop=1 intervention_rate=0.0762 front_blocked=0
|
|
[Eval 12840] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-560080.1 mean_steps=12.8
|
|
[Episode 12850] reward=-49536462.6 actor_loss=0.1082 critic_loss=123118048987.4286 entropy=3.4510 ent_coef=0.001884 approx_kl=0.0048 kl_stop=1 intervention_rate=0.0833 front_blocked=0
|
|
[Episode 12860] reward=-46387417.6 actor_loss=0.1142 critic_loss=120048230400.0000 entropy=3.4504 ent_coef=0.001884 approx_kl=0.0039 kl_stop=1 intervention_rate=0.0820 front_blocked=0
|
|
[Eval 12860] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-426623.2 mean_steps=13.3
|
|
[Episode 12870] reward=-50543815.6 actor_loss=0.1085 critic_loss=122229983004.4444 entropy=3.4523 ent_coef=0.001884 approx_kl=0.0040 kl_stop=1 intervention_rate=0.0814 front_blocked=0
|
|
[Episode 12880] reward=-51966906.8 actor_loss=0.1234 critic_loss=120886502195.2000 entropy=3.4515 ent_coef=0.001884 approx_kl=0.0024 kl_stop=1 intervention_rate=0.0807 front_blocked=0
|
|
[Eval 12880] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-293607.1 mean_steps=15.6
|
|
[Episode 12890] reward=-44498408.9 actor_loss=0.0978 critic_loss=119186953557.3333 entropy=3.4507 ent_coef=0.001884 approx_kl=0.0034 kl_stop=1 intervention_rate=0.0716 front_blocked=0
|
|
[Episode 12900] reward=-52741456.7 actor_loss=0.1187 critic_loss=125035872256.0000 entropy=3.4509 ent_coef=0.001884 approx_kl=0.0026 kl_stop=1 intervention_rate=0.0840 front_blocked=0
|
|
[Eval 12900] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-571809.3 mean_steps=12.8
|
|
[Episode 12910] reward=-57917151.0 actor_loss=0.1161 critic_loss=127305011785.1429 entropy=3.4517 ent_coef=0.001884 approx_kl=0.0033 kl_stop=1 intervention_rate=0.0814 front_blocked=0
|
|
[Episode 12920] reward=-50734922.5 actor_loss=0.1287 critic_loss=125188576987.4286 entropy=3.4508 ent_coef=0.001884 approx_kl=0.0029 kl_stop=1 intervention_rate=0.0859 front_blocked=0
|
|
[Eval 12920] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-531925.2 mean_steps=11.7
|
|
[Episode 12930] reward=-45885820.4 actor_loss=0.1060 critic_loss=118350586880.0000 entropy=3.4493 ent_coef=0.001884 approx_kl=0.0028 kl_stop=1 intervention_rate=0.0768 front_blocked=0
|
|
[Episode 12940] reward=-53587203.2 actor_loss=0.1257 critic_loss=123307868160.0000 entropy=3.4483 ent_coef=0.001884 approx_kl=0.0027 kl_stop=1 intervention_rate=0.0827 front_blocked=0
|
|
[Eval 12940] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-384360.6 mean_steps=14.0
|
|
[Episode 12950] reward=-55792466.2 actor_loss=0.1138 critic_loss=128026168661.3333 entropy=3.4458 ent_coef=0.001883 approx_kl=0.0039 kl_stop=1 intervention_rate=0.0801 front_blocked=0
|
|
[Episode 12960] reward=-57611633.5 actor_loss=0.1500 critic_loss=123009967900.4444 entropy=3.4444 ent_coef=0.001883 approx_kl=0.0030 kl_stop=1 intervention_rate=0.0898 front_blocked=0
|
|
[Eval 12960] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-387792.9 mean_steps=13.8
|
|
[Episode 12970] reward=-53845998.8 actor_loss=0.1276 critic_loss=122419274547.2000 entropy=3.4409 ent_coef=0.001883 approx_kl=0.0048 kl_stop=1 intervention_rate=0.0820 front_blocked=0
|
|
[Episode 12980] reward=-52681905.3 actor_loss=0.1272 critic_loss=126332957044.3636 entropy=3.4407 ent_coef=0.001883 approx_kl=0.0027 kl_stop=1 intervention_rate=0.0840 front_blocked=0
|
|
[Eval 12980] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-557410.9 mean_steps=12.7
|
|
[Episode 12990] reward=-55559629.9 actor_loss=0.1272 critic_loss=129920352256.0000 entropy=3.4367 ent_coef=0.001883 approx_kl=0.0020 kl_stop=1 intervention_rate=0.0879 front_blocked=0
|
|
[Episode 13000] reward=-59769909.5 actor_loss=0.1053 critic_loss=128724594688.0000 entropy=3.4355 ent_coef=0.001883 approx_kl=0.0040 kl_stop=1 intervention_rate=0.0827 front_blocked=0
|
|
[Eval 13000] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-502221.6 mean_steps=12.1
|
|
[Episode 13010] reward=-58744749.6 actor_loss=0.1050 critic_loss=126172418420.3636 entropy=3.4383 ent_coef=0.001883 approx_kl=0.0027 kl_stop=1 intervention_rate=0.0931 front_blocked=0
|
|
[Episode 13020] reward=-57988634.0 actor_loss=0.1057 critic_loss=128724828160.0000 entropy=3.4392 ent_coef=0.001883 approx_kl=0.0041 kl_stop=1 intervention_rate=0.0951 front_blocked=0
|
|
[Eval 13020] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-458219.2 mean_steps=13.4
|
|
[Episode 13030] reward=-59353239.9 actor_loss=0.1330 critic_loss=129202713600.0000 entropy=3.4377 ent_coef=0.001883 approx_kl=0.0035 kl_stop=1 intervention_rate=0.0944 front_blocked=0
|
|
[Episode 13040] reward=-49219712.3 actor_loss=0.1165 critic_loss=117461298176.0000 entropy=3.4361 ent_coef=0.001883 approx_kl=0.0050 kl_stop=1 intervention_rate=0.0801 front_blocked=0
|
|
[Eval 13040] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-476104.9 mean_steps=12.9
|
|
[Episode 13050] reward=-50332991.1 actor_loss=0.1195 critic_loss=120248676937.1429 entropy=3.4386 ent_coef=0.001883 approx_kl=0.0031 kl_stop=1 intervention_rate=0.0775 front_blocked=0
|
|
[Episode 13060] reward=-43648352.1 actor_loss=0.1113 critic_loss=116267881813.3333 entropy=3.4380 ent_coef=0.001882 approx_kl=0.0021 kl_stop=1 intervention_rate=0.0814 front_blocked=0
|
|
[Eval 13060] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-282780.9 mean_steps=15.4
|
|
[Episode 13070] reward=-51591491.3 actor_loss=0.1417 critic_loss=128798565990.4000 entropy=3.4379 ent_coef=0.001882 approx_kl=0.0046 kl_stop=1 intervention_rate=0.0840 front_blocked=0
|
|
[Episode 13080] reward=-54586634.1 actor_loss=0.1407 critic_loss=122727166537.1429 entropy=3.4366 ent_coef=0.001882 approx_kl=-0.0001 kl_stop=1 intervention_rate=0.0827 front_blocked=0
|
|
[Eval 13080] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-589478.6 mean_steps=12.7
|
|
[Episode 13090] reward=-55989723.1 actor_loss=0.1479 critic_loss=122813426594.9091 entropy=3.4335 ent_coef=0.001882 approx_kl=0.0022 kl_stop=1 intervention_rate=0.0892 front_blocked=0
|
|
[Episode 13100] reward=-60516839.5 actor_loss=0.0863 critic_loss=127782766182.4000 entropy=3.4319 ent_coef=0.001882 approx_kl=0.0026 kl_stop=1 intervention_rate=0.0801 front_blocked=0
|
|
[Eval 13100] success_rate=0.100 qp_infeasible_rate=0.900 mean_return=-691359.9 mean_steps=10.7
|
|
[Episode 13110] reward=-54094902.8 actor_loss=0.1369 critic_loss=124048747724.8000 entropy=3.4339 ent_coef=0.001882 approx_kl=0.0025 kl_stop=1 intervention_rate=0.0853 front_blocked=0
|
|
[Episode 13120] reward=-60426952.2 actor_loss=0.1064 critic_loss=127732298183.1111 entropy=3.4345 ent_coef=0.001882 approx_kl=0.0041 kl_stop=1 intervention_rate=0.0859 front_blocked=0
|
|
[Eval 13120] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-557958.4 mean_steps=12.9
|
|
[Episode 13130] reward=-66816831.8 actor_loss=0.1101 critic_loss=137109970944.0000 entropy=3.4306 ent_coef=0.001882 approx_kl=0.0042 kl_stop=1 intervention_rate=0.0872 front_blocked=0
|
|
[Episode 13140] reward=-54360824.7 actor_loss=0.1137 critic_loss=123412570112.0000 entropy=3.4287 ent_coef=0.001882 approx_kl=0.0020 kl_stop=1 intervention_rate=0.0872 front_blocked=0
|
|
[Eval 13140] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-601526.8 mean_steps=12.2
|
|
[Episode 13150] reward=-48354948.9 actor_loss=0.1150 critic_loss=122585276416.0000 entropy=3.4292 ent_coef=0.001882 approx_kl=0.0045 kl_stop=1 intervention_rate=0.0768 front_blocked=0
|
|
[Episode 13160] reward=-52285764.5 actor_loss=0.1083 critic_loss=124146199756.8000 entropy=3.4266 ent_coef=0.001882 approx_kl=0.0019 kl_stop=1 intervention_rate=0.0820 front_blocked=0
|
|
[Eval 13160] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-441803.8 mean_steps=14.5
|
|
[Episode 13170] reward=-58878964.4 actor_loss=0.1229 critic_loss=130139449344.0000 entropy=3.4266 ent_coef=0.001881 approx_kl=0.0033 kl_stop=1 intervention_rate=0.0853 front_blocked=0
|
|
[Episode 13180] reward=-58551751.6 actor_loss=0.0875 critic_loss=127448323413.3333 entropy=3.4286 ent_coef=0.001881 approx_kl=0.0000 kl_stop=1 intervention_rate=0.0781 front_blocked=0
|
|
[Eval 13180] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-379655.1 mean_steps=14.3
|
|
[Episode 13190] reward=-57075292.8 actor_loss=0.1113 critic_loss=128796143243.6364 entropy=3.4273 ent_coef=0.001881 approx_kl=0.0036 kl_stop=1 intervention_rate=0.0872 front_blocked=0
|
|
[Episode 13200] reward=-57192707.0 actor_loss=0.1154 critic_loss=123880000512.0000 entropy=3.4256 ent_coef=0.001881 approx_kl=0.0008 kl_stop=1 intervention_rate=0.0885 front_blocked=0
|
|
[Eval 13200] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-339687.3 mean_steps=16.1
|
|
[Episode 13210] reward=-56689027.0 actor_loss=0.1391 critic_loss=128660318500.5714 entropy=3.4235 ent_coef=0.001881 approx_kl=0.0024 kl_stop=1 intervention_rate=0.0911 front_blocked=0
|
|
[Episode 13220] reward=-42145076.6 actor_loss=0.1472 critic_loss=115362308096.0000 entropy=3.4219 ent_coef=0.001881 approx_kl=0.0035 kl_stop=1 intervention_rate=0.0775 front_blocked=0
|
|
[Eval 13220] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-411207.0 mean_steps=13.8
|
|
[Episode 13230] reward=-58008905.7 actor_loss=0.1093 critic_loss=127756510822.4000 entropy=3.4245 ent_coef=0.001881 approx_kl=0.0056 kl_stop=1 intervention_rate=0.0859 front_blocked=0
|
|
[Episode 13240] reward=-57177124.8 actor_loss=0.1162 critic_loss=129491110180.5714 entropy=3.4219 ent_coef=0.001881 approx_kl=0.0027 kl_stop=1 intervention_rate=0.0814 front_blocked=0
|
|
[Eval 13240] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-407490.4 mean_steps=14.6
|
|
[Episode 13250] reward=-56074060.6 actor_loss=0.1022 critic_loss=127204124048.6956 entropy=3.4242 ent_coef=0.001881 approx_kl=0.0011 kl_stop=1 intervention_rate=0.0781 front_blocked=0
|
|
[Episode 13260] reward=-61935101.5 actor_loss=0.1438 critic_loss=131074193221.8182 entropy=3.4245 ent_coef=0.001881 approx_kl=0.0042 kl_stop=1 intervention_rate=0.0866 front_blocked=0
|
|
[Eval 13260] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-468279.8 mean_steps=12.6
|
|
[Episode 13270] reward=-59626392.6 actor_loss=0.1374 critic_loss=129470978275.5556 entropy=3.4202 ent_coef=0.001881 approx_kl=0.0036 kl_stop=1 intervention_rate=0.0898 front_blocked=0
|
|
[Episode 13280] reward=-44993264.9 actor_loss=0.1117 critic_loss=119704221403.4286 entropy=3.4240 ent_coef=0.001880 approx_kl=0.0018 kl_stop=1 intervention_rate=0.0768 front_blocked=0
|
|
[Eval 13280] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-524437.4 mean_steps=13.0
|
|
[Episode 13290] reward=-48570880.9 actor_loss=0.1111 critic_loss=123495622656.0000 entropy=3.4245 ent_coef=0.001880 approx_kl=0.0027 kl_stop=1 intervention_rate=0.0710 front_blocked=0
|
|
[Episode 13300] reward=-55278966.5 actor_loss=0.1177 critic_loss=127128760320.0000 entropy=3.4230 ent_coef=0.001880 approx_kl=0.0030 kl_stop=1 intervention_rate=0.0872 front_blocked=0
|
|
[Eval 13300] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-557257.7 mean_steps=11.2
|
|
[Episode 13310] reward=-50581435.4 actor_loss=0.0854 critic_loss=123002697318.4000 entropy=3.4228 ent_coef=0.001880 approx_kl=0.0018 kl_stop=1 intervention_rate=0.0814 front_blocked=0
|
|
[Episode 13320] reward=-57534578.9 actor_loss=0.1203 critic_loss=127367528448.0000 entropy=3.4187 ent_coef=0.001880 approx_kl=0.0047 kl_stop=1 intervention_rate=0.0820 front_blocked=0
|
|
[Eval 13320] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-527097.4 mean_steps=11.7
|
|
[Episode 13330] reward=-54700844.5 actor_loss=0.1445 critic_loss=123975706624.0000 entropy=3.4179 ent_coef=0.001880 approx_kl=0.0024 kl_stop=1 intervention_rate=0.0872 front_blocked=0
|
|
[Episode 13340] reward=-57545189.0 actor_loss=0.1284 critic_loss=127900180480.0000 entropy=3.4201 ent_coef=0.001880 approx_kl=0.0013 kl_stop=1 intervention_rate=0.0827 front_blocked=0
|
|
[Eval 13340] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-410650.9 mean_steps=14.0
|
|
[Episode 13350] reward=-49748124.0 actor_loss=0.1009 critic_loss=123399685585.4545 entropy=3.4164 ent_coef=0.001880 approx_kl=0.0037 kl_stop=1 intervention_rate=0.0794 front_blocked=0
|
|
[Episode 13360] reward=-49497632.5 actor_loss=0.1141 critic_loss=123470116181.3333 entropy=3.4184 ent_coef=0.001880 approx_kl=0.0020 kl_stop=1 intervention_rate=0.0827 front_blocked=0
|
|
[Eval 13360] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-428140.7 mean_steps=13.4
|
|
[Episode 13370] reward=-65319297.3 actor_loss=0.1398 critic_loss=132575662080.0000 entropy=3.4143 ent_coef=0.001880 approx_kl=0.0028 kl_stop=1 intervention_rate=0.0931 front_blocked=0
|
|
[Episode 13380] reward=-51284400.6 actor_loss=0.1403 critic_loss=121362024220.4444 entropy=3.4146 ent_coef=0.001880 approx_kl=0.0053 kl_stop=1 intervention_rate=0.0807 front_blocked=0
|
|
[Eval 13380] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-574131.1 mean_steps=11.7
|
|
[Episode 13390] reward=-45140216.4 actor_loss=0.0893 critic_loss=121550684790.1538 entropy=3.4154 ent_coef=0.001879 approx_kl=0.0034 kl_stop=1 intervention_rate=0.0794 front_blocked=0
|
|
[Episode 13400] reward=-55512530.1 actor_loss=0.1107 critic_loss=124192343381.3333 entropy=3.4100 ent_coef=0.001879 approx_kl=0.0033 kl_stop=1 intervention_rate=0.0879 front_blocked=0
|
|
[Eval 13400] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-342649.4 mean_steps=14.9
|
|
[Episode 13410] reward=-68554495.4 actor_loss=0.1201 critic_loss=135629127680.0000 entropy=3.4093 ent_coef=0.001879 approx_kl=0.0025 kl_stop=1 intervention_rate=0.0924 front_blocked=0
|
|
[Episode 13420] reward=-55555294.2 actor_loss=0.1601 critic_loss=124354441588.3636 entropy=3.4124 ent_coef=0.001879 approx_kl=0.0040 kl_stop=1 intervention_rate=0.0892 front_blocked=0
|
|
[Eval 13420] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-372612.9 mean_steps=14.4
|
|
[Episode 13430] reward=-42226018.4 actor_loss=0.1000 critic_loss=121358591590.4000 entropy=3.4077 ent_coef=0.001879 approx_kl=0.0066 kl_stop=1 intervention_rate=0.0703 front_blocked=0
|
|
[Episode 13440] reward=-62723231.9 actor_loss=0.0798 critic_loss=132507077836.8000 entropy=3.4064 ent_coef=0.001879 approx_kl=0.0024 kl_stop=1 intervention_rate=0.0957 front_blocked=0
|
|
[Eval 13440] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-537798.6 mean_steps=12.4
|
|
[Episode 13450] reward=-49761622.3 actor_loss=0.0960 critic_loss=123007171515.7333 entropy=3.4088 ent_coef=0.001879 approx_kl=0.0044 kl_stop=1 intervention_rate=0.0749 front_blocked=0
|
|
[Episode 13460] reward=-50908822.8 actor_loss=0.1130 critic_loss=123059175424.0000 entropy=3.4093 ent_coef=0.001879 approx_kl=0.0035 kl_stop=1 intervention_rate=0.0859 front_blocked=0
|
|
[Eval 13460] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-480523.1 mean_steps=12.9
|
|
[Episode 13470] reward=-53771559.4 actor_loss=0.1820 critic_loss=120562666496.0000 entropy=3.4109 ent_coef=0.001879 approx_kl=0.0048 kl_stop=1 intervention_rate=0.0911 front_blocked=0
|
|
[Episode 13480] reward=-60696764.9 actor_loss=0.1232 critic_loss=127057459063.4667 entropy=3.4094 ent_coef=0.001879 approx_kl=0.0035 kl_stop=1 intervention_rate=0.0964 front_blocked=0
|
|
[Eval 13480] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-422794.2 mean_steps=13.8
|
|
[Episode 13490] reward=-57187432.2 actor_loss=0.1278 critic_loss=123038596209.7778 entropy=3.4102 ent_coef=0.001879 approx_kl=0.0043 kl_stop=1 intervention_rate=0.0859 front_blocked=0
|
|
[Episode 13500] reward=-47923164.4 actor_loss=0.0994 critic_loss=121383873974.8571 entropy=3.4089 ent_coef=0.001879 approx_kl=0.0059 kl_stop=1 intervention_rate=0.0749 front_blocked=0
|
|
[Eval 13500] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-502193.9 mean_steps=12.1
|
|
[Episode 13510] reward=-54627469.2 actor_loss=0.1215 critic_loss=129156217514.6667 entropy=3.4092 ent_coef=0.001878 approx_kl=0.0021 kl_stop=1 intervention_rate=0.0762 front_blocked=0
|
|
[Episode 13520] reward=-56983122.5 actor_loss=0.1102 critic_loss=125769654954.6667 entropy=3.4097 ent_coef=0.001878 approx_kl=0.0038 kl_stop=1 intervention_rate=0.0814 front_blocked=0
|
|
[Eval 13520] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-597680.9 mean_steps=11.3
|
|
[Episode 13530] reward=-60124391.5 actor_loss=0.0989 critic_loss=129574340198.4000 entropy=3.4083 ent_coef=0.001878 approx_kl=0.0036 kl_stop=1 intervention_rate=0.0879 front_blocked=0
|
|
[Episode 13540] reward=-49763671.5 actor_loss=0.1246 critic_loss=120985738353.7778 entropy=3.4031 ent_coef=0.001878 approx_kl=0.0039 kl_stop=1 intervention_rate=0.0853 front_blocked=0
|
|
[Eval 13540] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-371196.1 mean_steps=13.7
|
|
[Episode 13550] reward=-63513309.6 actor_loss=0.1536 critic_loss=130146125414.4000 entropy=3.3994 ent_coef=0.001878 approx_kl=0.0042 kl_stop=1 intervention_rate=0.0970 front_blocked=0
|
|
[Episode 13560] reward=-62790474.1 actor_loss=0.1140 critic_loss=132760698880.0000 entropy=3.3975 ent_coef=0.001878 approx_kl=0.0030 kl_stop=1 intervention_rate=0.0905 front_blocked=0
|
|
[Eval 13560] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-460086.5 mean_steps=13.4
|
|
[Episode 13570] reward=-43246214.0 actor_loss=0.1093 critic_loss=116785237430.8571 entropy=3.3971 ent_coef=0.001878 approx_kl=0.0034 kl_stop=1 intervention_rate=0.0762 front_blocked=0
|
|
[Episode 13580] reward=-54280370.5 actor_loss=0.1556 critic_loss=122715796480.0000 entropy=3.3975 ent_coef=0.001878 approx_kl=0.0022 kl_stop=1 intervention_rate=0.0794 front_blocked=0
|
|
[Eval 13580] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-589405.1 mean_steps=11.3
|
|
[Episode 13590] reward=-56032981.8 actor_loss=0.0965 critic_loss=125811687424.0000 entropy=3.3964 ent_coef=0.001878 approx_kl=0.0014 kl_stop=1 intervention_rate=0.0853 front_blocked=0
|
|
[Episode 13600] reward=-46322015.0 actor_loss=0.1098 critic_loss=119247169536.0000 entropy=3.3925 ent_coef=0.001878 approx_kl=0.0043 kl_stop=1 intervention_rate=0.0820 front_blocked=0
|
|
[Eval 13600] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-393388.0 mean_steps=14.3
|
|
[Episode 13610] reward=-54964647.6 actor_loss=0.1094 critic_loss=127840023893.3333 entropy=3.3929 ent_coef=0.001878 approx_kl=0.0021 kl_stop=1 intervention_rate=0.0788 front_blocked=0
|
|
[Episode 13620] reward=-40394515.9 actor_loss=0.1223 critic_loss=115969685913.6000 entropy=3.3899 ent_coef=0.001877 approx_kl=0.0037 kl_stop=1 intervention_rate=0.0814 front_blocked=0
|
|
[Eval 13620] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-550862.5 mean_steps=11.6
|
|
[Episode 13630] reward=-52775893.7 actor_loss=0.1234 critic_loss=121798875136.0000 entropy=3.3889 ent_coef=0.001877 approx_kl=0.0024 kl_stop=1 intervention_rate=0.0788 front_blocked=0
|
|
[Episode 13640] reward=-59254606.6 actor_loss=0.1121 critic_loss=122101936713.1429 entropy=3.3905 ent_coef=0.001877 approx_kl=0.0035 kl_stop=1 intervention_rate=0.0885 front_blocked=0
|
|
[Eval 13640] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-498639.3 mean_steps=12.9
|
|
[Episode 13650] reward=-57708087.3 actor_loss=0.1113 critic_loss=127120545319.3846 entropy=3.3890 ent_coef=0.001877 approx_kl=0.0012 kl_stop=1 intervention_rate=0.0814 front_blocked=0
|
|
[Episode 13660] reward=-54860831.5 actor_loss=0.1424 critic_loss=122217734144.0000 entropy=3.3911 ent_coef=0.001877 approx_kl=0.0052 kl_stop=1 intervention_rate=0.0853 front_blocked=0
|
|
[Eval 13660] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-337000.2 mean_steps=14.9
|
|
[Episode 13670] reward=-57753127.3 actor_loss=0.1043 critic_loss=122803889766.4000 entropy=3.3899 ent_coef=0.001877 approx_kl=0.0027 kl_stop=1 intervention_rate=0.0840 front_blocked=0
|
|
[Episode 13680] reward=-59164496.3 actor_loss=0.1505 critic_loss=128001307443.2000 entropy=3.3888 ent_coef=0.001877 approx_kl=0.0015 kl_stop=1 intervention_rate=0.0970 front_blocked=0
|
|
[Eval 13680] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-399319.3 mean_steps=13.8
|
|
[Episode 13690] reward=-53104920.5 actor_loss=0.1185 critic_loss=123581801813.3333 entropy=3.3909 ent_coef=0.001877 approx_kl=0.0028 kl_stop=1 intervention_rate=0.0820 front_blocked=0
|
|
[Episode 13700] reward=-47313882.1 actor_loss=0.1243 critic_loss=121231077961.1429 entropy=3.3937 ent_coef=0.001877 approx_kl=0.0031 kl_stop=1 intervention_rate=0.0781 front_blocked=0
|
|
[Eval 13700] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-341769.1 mean_steps=14.8
|
|
[Episode 13710] reward=-52799810.6 actor_loss=0.1161 critic_loss=123262509875.2000 entropy=3.3948 ent_coef=0.001877 approx_kl=0.0031 kl_stop=1 intervention_rate=0.0814 front_blocked=0
|
|
[Episode 13720] reward=-54255031.3 actor_loss=0.1260 critic_loss=121735756800.0000 entropy=3.3942 ent_coef=0.001877 approx_kl=0.0032 kl_stop=1 intervention_rate=0.0879 front_blocked=0
|
|
[Eval 13720] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-402065.0 mean_steps=13.8
|
|
[Episode 13730] reward=-54579421.1 actor_loss=0.1283 critic_loss=122463412224.0000 entropy=3.3926 ent_coef=0.001876 approx_kl=0.0054 kl_stop=1 intervention_rate=0.0898 front_blocked=0
|
|
[Episode 13740] reward=-55567110.9 actor_loss=0.0904 critic_loss=125388827648.0000 entropy=3.3886 ent_coef=0.001876 approx_kl=0.0017 kl_stop=1 intervention_rate=0.0755 front_blocked=0
|
|
[Eval 13740] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-552884.1 mean_steps=12.6
|
|
[Episode 13750] reward=-52571770.5 actor_loss=0.1005 critic_loss=127012165632.0000 entropy=3.3873 ent_coef=0.001876 approx_kl=0.0039 kl_stop=1 intervention_rate=0.0807 front_blocked=0
|
|
[Episode 13760] reward=-62849049.7 actor_loss=0.1574 critic_loss=127664311202.9091 entropy=3.3845 ent_coef=0.001876 approx_kl=0.0007 kl_stop=1 intervention_rate=0.0996 front_blocked=0
|
|
[Eval 13760] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-325758.6 mean_steps=14.7
|
|
[Episode 13770] reward=-55029430.0 actor_loss=0.1502 critic_loss=122667546331.4286 entropy=3.3866 ent_coef=0.001876 approx_kl=0.0020 kl_stop=1 intervention_rate=0.0866 front_blocked=0
|
|
[Episode 13780] reward=-44845412.8 actor_loss=0.1177 critic_loss=120952633571.5556 entropy=3.3849 ent_coef=0.001876 approx_kl=0.0043 kl_stop=1 intervention_rate=0.0788 front_blocked=0
|
|
[Eval 13780] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-306334.6 mean_steps=15.6
|
|
[Episode 13790] reward=-53116693.0 actor_loss=0.1042 critic_loss=124288692224.0000 entropy=3.3847 ent_coef=0.001876 approx_kl=0.0055 kl_stop=1 intervention_rate=0.0762 front_blocked=0
|
|
[Episode 13800] reward=-56090224.2 actor_loss=0.1008 critic_loss=122113564672.0000 entropy=3.3815 ent_coef=0.001876 approx_kl=0.0044 kl_stop=1 intervention_rate=0.0840 front_blocked=0
|
|
[Eval 13800] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-437413.0 mean_steps=13.2
|
|
[Episode 13810] reward=-61677674.2 actor_loss=0.1403 critic_loss=128173118259.2000 entropy=3.3831 ent_coef=0.001876 approx_kl=0.0025 kl_stop=1 intervention_rate=0.0885 front_blocked=0
|
|
[Episode 13820] reward=-58246042.6 actor_loss=0.1030 critic_loss=124789202199.2727 entropy=3.3825 ent_coef=0.001876 approx_kl=0.0042 kl_stop=1 intervention_rate=0.0859 front_blocked=0
|
|
[Eval 13820] success_rate=0.750 qp_infeasible_rate=0.250 mean_return=-135749.5 mean_steps=16.6
|
|
[Episode 13830] reward=-66421153.8 actor_loss=0.1365 critic_loss=127009937115.4286 entropy=3.3823 ent_coef=0.001876 approx_kl=0.0037 kl_stop=1 intervention_rate=0.0964 front_blocked=0
|
|
[Episode 13840] reward=-61224326.7 actor_loss=0.1477 critic_loss=128101887180.8000 entropy=3.3789 ent_coef=0.001875 approx_kl=0.0045 kl_stop=1 intervention_rate=0.0924 front_blocked=0
|
|
[Eval 13840] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-467426.6 mean_steps=13.6
|
|
[Episode 13850] reward=-65014744.3 actor_loss=0.1443 critic_loss=132135752052.3636 entropy=3.3776 ent_coef=0.001875 approx_kl=0.0018 kl_stop=1 intervention_rate=0.0977 front_blocked=0
|
|
[Episode 13860] reward=-43430393.5 actor_loss=0.1129 critic_loss=118062305865.1429 entropy=3.3746 ent_coef=0.001875 approx_kl=0.0031 kl_stop=1 intervention_rate=0.0788 front_blocked=0
|
|
[Eval 13860] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-457490.3 mean_steps=13.2
|
|
[Episode 13870] reward=-50221795.1 actor_loss=0.1238 critic_loss=122938533205.3333 entropy=3.3740 ent_coef=0.001875 approx_kl=0.0024 kl_stop=1 intervention_rate=0.0820 front_blocked=0
|
|
[Episode 13880] reward=-57287397.8 actor_loss=0.1296 critic_loss=128150457548.8000 entropy=3.3740 ent_coef=0.001875 approx_kl=0.0034 kl_stop=1 intervention_rate=0.0898 front_blocked=0
|
|
[Eval 13880] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-468421.5 mean_steps=13.4
|
|
[Episode 13890] reward=-53006309.3 actor_loss=0.1465 critic_loss=122893576874.6667 entropy=3.3735 ent_coef=0.001875 approx_kl=0.0050 kl_stop=1 intervention_rate=0.0879 front_blocked=0
|
|
[Episode 13900] reward=-54547663.4 actor_loss=0.1239 critic_loss=126423644842.6667 entropy=3.3745 ent_coef=0.001875 approx_kl=0.0042 kl_stop=1 intervention_rate=0.0833 front_blocked=0
|
|
[Eval 13900] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-519718.0 mean_steps=12.1
|
|
[Episode 13910] reward=-55729225.0 actor_loss=0.1330 critic_loss=126456306073.6000 entropy=3.3722 ent_coef=0.001875 approx_kl=0.0015 kl_stop=1 intervention_rate=0.0879 front_blocked=0
|
|
[Episode 13920] reward=-47020976.8 actor_loss=0.1122 critic_loss=119429695624.5333 entropy=3.3704 ent_coef=0.001875 approx_kl=0.0011 kl_stop=1 intervention_rate=0.0827 front_blocked=0
|
|
[Eval 13920] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-333727.2 mean_steps=13.9
|
|
[Episode 13930] reward=-54363777.7 actor_loss=0.1148 critic_loss=121401889353.1429 entropy=3.3676 ent_coef=0.001875 approx_kl=0.0043 kl_stop=1 intervention_rate=0.0918 front_blocked=0
|
|
[Episode 13940] reward=-62164063.8 actor_loss=0.1103 critic_loss=129340322377.1429 entropy=3.3653 ent_coef=0.001875 approx_kl=0.0057 kl_stop=1 intervention_rate=0.0898 front_blocked=0
|
|
[Eval 13940] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-491669.3 mean_steps=12.1
|
|
[Episode 13950] reward=-56674487.1 actor_loss=0.1230 critic_loss=129295458304.0000 entropy=3.3662 ent_coef=0.001874 approx_kl=0.0036 kl_stop=1 intervention_rate=0.0833 front_blocked=0
|
|
[Episode 13960] reward=-66297711.6 actor_loss=0.1485 critic_loss=134122456678.4000 entropy=3.3658 ent_coef=0.001874 approx_kl=0.0080 kl_stop=1 intervention_rate=0.0931 front_blocked=0
|
|
[Eval 13960] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-688322.3 mean_steps=11.7
|
|
[Episode 13970] reward=-43121277.4 actor_loss=0.1092 critic_loss=120498885973.3333 entropy=3.3666 ent_coef=0.001874 approx_kl=0.0022 kl_stop=1 intervention_rate=0.0736 front_blocked=0
|
|
[Episode 13980] reward=-55712761.7 actor_loss=0.1336 critic_loss=128002061312.0000 entropy=3.3655 ent_coef=0.001874 approx_kl=0.0034 kl_stop=1 intervention_rate=0.0846 front_blocked=0
|
|
[Eval 13980] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-396836.6 mean_steps=13.8
|
|
[Episode 13990] reward=-52298821.2 actor_loss=0.1236 critic_loss=124089688064.0000 entropy=3.3644 ent_coef=0.001874 approx_kl=0.0038 kl_stop=1 intervention_rate=0.0820 front_blocked=0
|
|
[Episode 14000] reward=-40432920.4 actor_loss=0.1248 critic_loss=121126045882.1818 entropy=3.3645 ent_coef=0.001874 approx_kl=0.0028 kl_stop=1 intervention_rate=0.0749 front_blocked=0
|
|
[Eval 14000] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-617627.5 mean_steps=11.4
|
|
[Episode 14010] reward=-53035659.1 actor_loss=0.1053 critic_loss=125560413297.7778 entropy=3.3641 ent_coef=0.001874 approx_kl=0.0027 kl_stop=1 intervention_rate=0.0827 front_blocked=0
|
|
[Episode 14020] reward=-53588286.2 actor_loss=0.1307 critic_loss=125005392896.0000 entropy=3.3620 ent_coef=0.001874 approx_kl=0.0044 kl_stop=1 intervention_rate=0.0859 front_blocked=0
|
|
[Eval 14020] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-430079.4 mean_steps=13.3
|
|
[Episode 14030] reward=-53727784.6 actor_loss=0.1236 critic_loss=125180895976.7273 entropy=3.3631 ent_coef=0.001874 approx_kl=0.0009 kl_stop=1 intervention_rate=0.0853 front_blocked=0
|
|
[Episode 14040] reward=-50003004.3 actor_loss=0.1191 critic_loss=121973472256.0000 entropy=3.3657 ent_coef=0.001874 approx_kl=0.0035 kl_stop=1 intervention_rate=0.0814 front_blocked=0
|
|
[Eval 14040] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-481808.9 mean_steps=12.8
|
|
[Episode 14050] reward=-55347354.6 actor_loss=0.1506 critic_loss=122702641152.0000 entropy=3.3684 ent_coef=0.001874 approx_kl=0.0032 kl_stop=1 intervention_rate=0.0872 front_blocked=0
|
|
[Episode 14060] reward=-51777413.6 actor_loss=0.1292 critic_loss=122548176337.4545 entropy=3.3675 ent_coef=0.001873 approx_kl=0.0033 kl_stop=1 intervention_rate=0.0846 front_blocked=0
|
|
[Eval 14060] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-609838.4 mean_steps=12.2
|
|
[Episode 14070] reward=-41800184.4 actor_loss=0.1048 critic_loss=118510050157.7143 entropy=3.3662 ent_coef=0.001873 approx_kl=0.0026 kl_stop=1 intervention_rate=0.0762 front_blocked=0
|
|
[Episode 14080] reward=-53538675.7 actor_loss=0.1180 critic_loss=126701007872.0000 entropy=3.3695 ent_coef=0.001873 approx_kl=0.0027 kl_stop=1 intervention_rate=0.0814 front_blocked=0
|
|
[Eval 14080] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-601724.5 mean_steps=12.3
|
|
[Episode 14090] reward=-59729917.0 actor_loss=0.1113 critic_loss=125069273380.5714 entropy=3.3711 ent_coef=0.001873 approx_kl=0.0044 kl_stop=1 intervention_rate=0.0879 front_blocked=0
|
|
[Episode 14100] reward=-50764623.3 actor_loss=0.1015 critic_loss=121154040263.1111 entropy=3.3706 ent_coef=0.001873 approx_kl=0.0016 kl_stop=1 intervention_rate=0.0853 front_blocked=0
|
|
[Eval 14100] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-471506.7 mean_steps=13.0
|
|
[Episode 14110] reward=-53248622.8 actor_loss=0.1115 critic_loss=125313616164.5714 entropy=3.3713 ent_coef=0.001873 approx_kl=0.0023 kl_stop=1 intervention_rate=0.0801 front_blocked=0
|
|
[Episode 14120] reward=-59673363.3 actor_loss=0.0920 critic_loss=128860950155.6364 entropy=3.3705 ent_coef=0.001873 approx_kl=0.0024 kl_stop=1 intervention_rate=0.0853 front_blocked=0
|
|
[Eval 14120] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-529408.1 mean_steps=11.9
|
|
[Episode 14130] reward=-47972384.3 actor_loss=0.1233 critic_loss=119489277952.0000 entropy=3.3669 ent_coef=0.001873 approx_kl=0.0026 kl_stop=1 intervention_rate=0.0801 front_blocked=0
|
|
[Episode 14140] reward=-56607140.6 actor_loss=0.1291 critic_loss=124904980480.0000 entropy=3.3645 ent_coef=0.001873 approx_kl=0.0043 kl_stop=1 intervention_rate=0.0872 front_blocked=0
|
|
[Eval 14140] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-534348.9 mean_steps=13.2
|
|
[Episode 14150] reward=-48105896.8 actor_loss=0.1393 critic_loss=122069595750.4000 entropy=3.3659 ent_coef=0.001873 approx_kl=0.0022 kl_stop=1 intervention_rate=0.0820 front_blocked=0
|
|
[Episode 14160] reward=-50181042.7 actor_loss=0.0971 critic_loss=127836548300.8000 entropy=3.3664 ent_coef=0.001873 approx_kl=0.0015 kl_stop=1 intervention_rate=0.0755 front_blocked=0
|
|
[Eval 14160] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-606738.0 mean_steps=12.6
|
|
[Episode 14170] reward=-53497414.5 actor_loss=0.1160 critic_loss=126451379404.8000 entropy=3.3663 ent_coef=0.001872 approx_kl=0.0007 kl_stop=1 intervention_rate=0.0807 front_blocked=0
|
|
[Episode 14180] reward=-64266472.7 actor_loss=0.1393 critic_loss=128915503786.6667 entropy=3.3666 ent_coef=0.001872 approx_kl=0.0021 kl_stop=1 intervention_rate=0.0944 front_blocked=0
|
|
[Eval 14180] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-700204.7 mean_steps=11.2
|
|
[Episode 14190] reward=-57412133.6 actor_loss=0.1206 critic_loss=126826978157.7143 entropy=3.3620 ent_coef=0.001872 approx_kl=0.0014 kl_stop=1 intervention_rate=0.0840 front_blocked=0
|
|
[Episode 14200] reward=-57865018.2 actor_loss=0.1206 critic_loss=124528915797.3333 entropy=3.3587 ent_coef=0.001872 approx_kl=0.0024 kl_stop=1 intervention_rate=0.0892 front_blocked=0
|
|
[Eval 14200] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-482257.0 mean_steps=12.8
|
|
[Episode 14210] reward=-42016634.7 actor_loss=0.1134 critic_loss=116215631286.8571 entropy=3.3564 ent_coef=0.001872 approx_kl=0.0046 kl_stop=1 intervention_rate=0.0820 front_blocked=0
|
|
[Episode 14220] reward=-62725164.6 actor_loss=0.1568 critic_loss=129785757696.0000 entropy=3.3572 ent_coef=0.001872 approx_kl=0.0036 kl_stop=1 intervention_rate=0.0924 front_blocked=0
|
|
[Eval 14220] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-477665.9 mean_steps=11.7
|
|
[Episode 14230] reward=-47296086.5 actor_loss=0.1255 critic_loss=123249310833.7778 entropy=3.3590 ent_coef=0.001872 approx_kl=0.0026 kl_stop=1 intervention_rate=0.0801 front_blocked=0
|
|
[Episode 14240] reward=-43811413.4 actor_loss=0.1213 critic_loss=118679652059.4286 entropy=3.3600 ent_coef=0.001872 approx_kl=0.0020 kl_stop=1 intervention_rate=0.0853 front_blocked=0
|
|
[Eval 14240] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-444837.9 mean_steps=13.5
|
|
[Episode 14250] reward=-52962498.7 actor_loss=0.1507 critic_loss=125826964138.6667 entropy=3.3555 ent_coef=0.001872 approx_kl=0.0013 kl_stop=1 intervention_rate=0.0898 front_blocked=0
|
|
[Episode 14260] reward=-51057596.6 actor_loss=0.1237 critic_loss=123367830869.3333 entropy=3.3562 ent_coef=0.001872 approx_kl=0.0037 kl_stop=1 intervention_rate=0.0833 front_blocked=0
|
|
[Eval 14260] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-531606.5 mean_steps=13.3
|
|
[Episode 14270] reward=-41834760.1 actor_loss=0.0923 critic_loss=117542655707.4286 entropy=3.3573 ent_coef=0.001872 approx_kl=0.0010 kl_stop=1 intervention_rate=0.0690 front_blocked=0
|
|
[Episode 14280] reward=-46951217.3 actor_loss=0.0871 critic_loss=120935215104.0000 entropy=3.3565 ent_coef=0.001871 approx_kl=0.0031 kl_stop=1 intervention_rate=0.0840 front_blocked=0
|
|
[Eval 14280] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-480629.4 mean_steps=13.4
|
|
[Episode 14290] reward=-53519093.7 actor_loss=0.1171 critic_loss=122644225303.2727 entropy=3.3563 ent_coef=0.001871 approx_kl=0.0031 kl_stop=1 intervention_rate=0.0794 front_blocked=0
|
|
[Episode 14300] reward=-51507844.8 actor_loss=0.0996 critic_loss=120891560960.0000 entropy=3.3572 ent_coef=0.001871 approx_kl=0.0033 kl_stop=1 intervention_rate=0.0788 front_blocked=0
|
|
[Eval 14300] success_rate=0.650 qp_infeasible_rate=0.350 mean_return=-232971.7 mean_steps=16.0
|
|
[Episode 14310] reward=-50317277.7 actor_loss=0.1119 critic_loss=121320726528.0000 entropy=3.3555 ent_coef=0.001871 approx_kl=0.0039 kl_stop=1 intervention_rate=0.0736 front_blocked=0
|
|
[Episode 14320] reward=-55543079.9 actor_loss=0.0951 critic_loss=128337472170.6667 entropy=3.3540 ent_coef=0.001871 approx_kl=0.0038 kl_stop=1 intervention_rate=0.0788 front_blocked=0
|
|
[Eval 14320] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-508507.0 mean_steps=13.2
|
|
[Episode 14330] reward=-51631384.9 actor_loss=0.1208 critic_loss=122922525842.2857 entropy=3.3552 ent_coef=0.001871 approx_kl=0.0034 kl_stop=1 intervention_rate=0.0788 front_blocked=0
|
|
[Episode 14340] reward=-46199958.4 actor_loss=0.1083 critic_loss=121511309897.1429 entropy=3.3571 ent_coef=0.001871 approx_kl=0.0051 kl_stop=1 intervention_rate=0.0729 front_blocked=0
|
|
[Eval 14340] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-294417.1 mean_steps=15.0
|
|
[Episode 14350] reward=-61947984.9 actor_loss=0.1124 critic_loss=131333368832.0000 entropy=3.3566 ent_coef=0.001871 approx_kl=0.0036 kl_stop=1 intervention_rate=0.0918 front_blocked=0
|
|
[Episode 14360] reward=-49038573.6 actor_loss=0.1224 critic_loss=119794111829.3333 entropy=3.3565 ent_coef=0.001871 approx_kl=0.0018 kl_stop=1 intervention_rate=0.0768 front_blocked=0
|
|
[Eval 14360] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-489454.2 mean_steps=12.8
|
|
[Episode 14370] reward=-50635795.2 actor_loss=0.1517 critic_loss=121452476123.4286 entropy=3.3538 ent_coef=0.001871 approx_kl=0.0037 kl_stop=1 intervention_rate=0.0866 front_blocked=0
|
|
[Episode 14380] reward=-51929480.1 actor_loss=0.0808 critic_loss=126258176819.2000 entropy=3.3539 ent_coef=0.001871 approx_kl=0.0013 kl_stop=1 intervention_rate=0.0801 front_blocked=0
|
|
[Eval 14380] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-451770.7 mean_steps=12.6
|
|
[Episode 14390] reward=-44544529.8 actor_loss=0.1182 critic_loss=119916631381.3333 entropy=3.3511 ent_coef=0.001870 approx_kl=0.0039 kl_stop=1 intervention_rate=0.0749 front_blocked=0
|
|
[Episode 14400] reward=-59053323.5 actor_loss=0.1034 critic_loss=128968981783.2727 entropy=3.3516 ent_coef=0.001870 approx_kl=0.0022 kl_stop=1 intervention_rate=0.0859 front_blocked=0
|
|
[Eval 14400] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-378649.5 mean_steps=14.8
|
|
[Episode 14410] reward=-54179836.4 actor_loss=0.1296 critic_loss=124185675962.1818 entropy=3.3507 ent_coef=0.001870 approx_kl=0.0030 kl_stop=1 intervention_rate=0.0827 front_blocked=0
|
|
[Episode 14420] reward=-49663277.2 actor_loss=0.1074 critic_loss=118070272000.0000 entropy=3.3484 ent_coef=0.001870 approx_kl=0.0025 kl_stop=1 intervention_rate=0.0820 front_blocked=0
|
|
[Eval 14420] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-429537.5 mean_steps=14.1
|
|
[Episode 14430] reward=-43586282.2 actor_loss=0.1035 critic_loss=115937114521.6000 entropy=3.3475 ent_coef=0.001870 approx_kl=0.0037 kl_stop=1 intervention_rate=0.0716 front_blocked=0
|
|
[Episode 14440] reward=-57862379.3 actor_loss=0.1090 critic_loss=122883557218.4615 entropy=3.3453 ent_coef=0.001870 approx_kl=0.0015 kl_stop=1 intervention_rate=0.0970 front_blocked=0
|
|
[Eval 14440] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-394219.7 mean_steps=14.2
|
|
[Episode 14450] reward=-47190845.8 actor_loss=0.1232 critic_loss=120115377245.0909 entropy=3.3437 ent_coef=0.001870 approx_kl=0.0037 kl_stop=1 intervention_rate=0.0742 front_blocked=0
|
|
[Episode 14460] reward=-51834868.6 actor_loss=0.1044 critic_loss=121337130062.7692 entropy=3.3436 ent_coef=0.001870 approx_kl=0.0038 kl_stop=1 intervention_rate=0.0755 front_blocked=0
|
|
[Eval 14460] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-384332.8 mean_steps=14.8
|
|
[Episode 14470] reward=-57053076.7 actor_loss=0.1310 critic_loss=123887360000.0000 entropy=3.3402 ent_coef=0.001870 approx_kl=0.0041 kl_stop=1 intervention_rate=0.0742 front_blocked=0
|
|
[Episode 14480] reward=-54444859.8 actor_loss=0.1124 critic_loss=126863003648.0000 entropy=3.3407 ent_coef=0.001870 approx_kl=0.0033 kl_stop=1 intervention_rate=0.0820 front_blocked=0
|
|
[Eval 14480] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-480577.1 mean_steps=12.8
|
|
[Episode 14490] reward=-46442602.2 actor_loss=0.1005 critic_loss=119684035584.0000 entropy=3.3398 ent_coef=0.001870 approx_kl=0.0021 kl_stop=1 intervention_rate=0.0788 front_blocked=0
|
|
[Episode 14500] reward=-45422566.3 actor_loss=0.0815 critic_loss=116273293084.4444 entropy=3.3422 ent_coef=0.001870 approx_kl=0.0021 kl_stop=1 intervention_rate=0.0703 front_blocked=0
|
|
[Eval 14500] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-477449.2 mean_steps=13.4
|
|
[Episode 14510] reward=-47182643.8 actor_loss=0.1368 critic_loss=120392936106.6667 entropy=3.3444 ent_coef=0.001869 approx_kl=0.0018 kl_stop=1 intervention_rate=0.0853 front_blocked=0
|
|
[Episode 14520] reward=-51922500.7 actor_loss=0.1221 critic_loss=125667375981.7143 entropy=3.3457 ent_coef=0.001869 approx_kl=0.0057 kl_stop=1 intervention_rate=0.0846 front_blocked=0
|
|
[Eval 14520] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-490062.6 mean_steps=12.2
|
|
[Episode 14530] reward=-57937805.3 actor_loss=0.1356 critic_loss=124763036330.6667 entropy=3.3458 ent_coef=0.001869 approx_kl=0.0037 kl_stop=1 intervention_rate=0.0872 front_blocked=0
|
|
[Episode 14540] reward=-43926203.7 actor_loss=0.1078 critic_loss=118514049024.0000 entropy=3.3438 ent_coef=0.001869 approx_kl=0.0045 kl_stop=1 intervention_rate=0.0814 front_blocked=0
|
|
[Eval 14540] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-406435.4 mean_steps=14.8
|
|
[Episode 14550] reward=-44731739.8 actor_loss=0.0907 critic_loss=119893275079.1111 entropy=3.3447 ent_coef=0.001869 approx_kl=0.0040 kl_stop=1 intervention_rate=0.0690 front_blocked=0
|
|
[Episode 14560] reward=-69243573.4 actor_loss=0.1103 critic_loss=137976099635.2000 entropy=3.3481 ent_coef=0.001869 approx_kl=0.0044 kl_stop=1 intervention_rate=0.0938 front_blocked=0
|
|
[Eval 14560] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-501040.5 mean_steps=13.1
|
|
[Episode 14570] reward=-59405350.3 actor_loss=0.1343 critic_loss=129562222592.0000 entropy=3.3473 ent_coef=0.001869 approx_kl=0.0045 kl_stop=1 intervention_rate=0.0892 front_blocked=0
|
|
[Episode 14580] reward=-52255395.0 actor_loss=0.1342 critic_loss=120643213019.4286 entropy=3.3474 ent_coef=0.001869 approx_kl=0.0039 kl_stop=1 intervention_rate=0.0872 front_blocked=0
|
|
[Eval 14580] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-444662.0 mean_steps=12.9
|
|
[Episode 14590] reward=-52858903.9 actor_loss=0.1046 critic_loss=122331811020.8000 entropy=3.3476 ent_coef=0.001869 approx_kl=0.0053 kl_stop=1 intervention_rate=0.0775 front_blocked=0
|
|
[Episode 14600] reward=-50584215.7 actor_loss=0.1115 critic_loss=121394411747.5556 entropy=3.3482 ent_coef=0.001869 approx_kl=0.0033 kl_stop=1 intervention_rate=0.0801 front_blocked=0
|
|
[Eval 14600] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-499575.7 mean_steps=12.6
|
|
[Episode 14610] reward=-55574264.9 actor_loss=0.1079 critic_loss=122593921316.5714 entropy=3.3488 ent_coef=0.001869 approx_kl=0.0053 kl_stop=1 intervention_rate=0.0846 front_blocked=0
|
|
[Episode 14620] reward=-43880708.4 actor_loss=0.0978 critic_loss=117837771869.0909 entropy=3.3454 ent_coef=0.001868 approx_kl=0.0019 kl_stop=1 intervention_rate=0.0697 front_blocked=0
|
|
[Eval 14620] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-407587.4 mean_steps=14.1
|
|
[Episode 14630] reward=-55689068.2 actor_loss=0.1000 critic_loss=122546510506.6667 entropy=3.3451 ent_coef=0.001868 approx_kl=0.0031 kl_stop=1 intervention_rate=0.0794 front_blocked=0
|
|
[Episode 14640] reward=-51299910.4 actor_loss=0.1138 critic_loss=124054627564.3077 entropy=3.3458 ent_coef=0.001868 approx_kl=0.0027 kl_stop=1 intervention_rate=0.0833 front_blocked=0
|
|
[Eval 14640] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-291310.9 mean_steps=15.0
|
|
[Episode 14650] reward=-47066216.8 actor_loss=0.1281 critic_loss=120111875072.0000 entropy=3.3468 ent_coef=0.001868 approx_kl=0.0030 kl_stop=1 intervention_rate=0.0814 front_blocked=0
|
|
[Episode 14660] reward=-52411573.3 actor_loss=0.1001 critic_loss=125533396992.0000 entropy=3.3456 ent_coef=0.001868 approx_kl=0.0033 kl_stop=1 intervention_rate=0.0833 front_blocked=0
|
|
[Eval 14660] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-507800.4 mean_steps=12.2
|
|
[Episode 14670] reward=-53051185.8 actor_loss=0.1131 critic_loss=124680893533.0909 entropy=3.3467 ent_coef=0.001868 approx_kl=0.0011 kl_stop=1 intervention_rate=0.0820 front_blocked=0
|
|
[Episode 14680] reward=-48541334.4 actor_loss=0.1145 critic_loss=125335967451.4286 entropy=3.3404 ent_coef=0.001868 approx_kl=0.0027 kl_stop=1 intervention_rate=0.0820 front_blocked=0
|
|
[Eval 14680] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-522956.9 mean_steps=12.4
|
|
[Episode 14690] reward=-46868042.9 actor_loss=0.0856 critic_loss=120308421725.0909 entropy=3.3389 ent_coef=0.001868 approx_kl=0.0045 kl_stop=1 intervention_rate=0.0716 front_blocked=0
|
|
[Episode 14700] reward=-55551175.4 actor_loss=0.0955 critic_loss=124445859840.0000 entropy=3.3384 ent_coef=0.001868 approx_kl=0.0001 kl_stop=1 intervention_rate=0.0846 front_blocked=0
|
|
[Eval 14700] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-580343.0 mean_steps=11.9
|
|
[Episode 14710] reward=-49054913.4 actor_loss=0.1097 critic_loss=121027811328.0000 entropy=3.3361 ent_coef=0.001868 approx_kl=0.0034 kl_stop=1 intervention_rate=0.0768 front_blocked=0
|
|
[Episode 14720] reward=-63006721.9 actor_loss=0.1045 critic_loss=127748038656.0000 entropy=3.3347 ent_coef=0.001868 approx_kl=0.0034 kl_stop=1 intervention_rate=0.0859 front_blocked=0
|
|
[Eval 14720] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-655413.8 mean_steps=11.3
|
|
[Episode 14730] reward=-55634604.9 actor_loss=0.1208 critic_loss=127701852160.0000 entropy=3.3357 ent_coef=0.001867 approx_kl=0.0026 kl_stop=1 intervention_rate=0.0905 front_blocked=0
|
|
[Episode 14740] reward=-46102548.6 actor_loss=0.0981 critic_loss=120829335779.5556 entropy=3.3298 ent_coef=0.001867 approx_kl=0.0039 kl_stop=1 intervention_rate=0.0755 front_blocked=0
|
|
[Eval 14740] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-487606.8 mean_steps=12.9
|
|
[Episode 14750] reward=-53959602.6 actor_loss=0.1184 critic_loss=126144409227.6364 entropy=3.3277 ent_coef=0.001867 approx_kl=0.0039 kl_stop=1 intervention_rate=0.0775 front_blocked=0
|
|
[Episode 14760] reward=-57899404.8 actor_loss=0.1293 critic_loss=126444978176.0000 entropy=3.3241 ent_coef=0.001867 approx_kl=0.0039 kl_stop=1 intervention_rate=0.0846 front_blocked=0
|
|
[Eval 14760] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-493283.3 mean_steps=12.9
|
|
[Episode 14770] reward=-55485741.2 actor_loss=0.1198 critic_loss=125078919577.6000 entropy=3.3266 ent_coef=0.001867 approx_kl=0.0036 kl_stop=1 intervention_rate=0.0788 front_blocked=0
|
|
[Episode 14780] reward=-45945024.1 actor_loss=0.1090 critic_loss=119892188091.7333 entropy=3.3315 ent_coef=0.001867 approx_kl=0.0030 kl_stop=1 intervention_rate=0.0775 front_blocked=0
|
|
[Eval 14780] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-437954.5 mean_steps=13.2
|
|
[Episode 14790] reward=-51700664.6 actor_loss=0.1094 critic_loss=119692605147.4286 entropy=3.3312 ent_coef=0.001867 approx_kl=0.0031 kl_stop=1 intervention_rate=0.0807 front_blocked=0
|
|
[Episode 14800] reward=-52403572.3 actor_loss=0.1195 critic_loss=122291140315.4286 entropy=3.3324 ent_coef=0.001867 approx_kl=0.0046 kl_stop=1 intervention_rate=0.0833 front_blocked=0
|
|
[Eval 14800] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-550157.9 mean_steps=12.6
|
|
[Episode 14810] reward=-45293605.5 actor_loss=0.1123 critic_loss=123867811840.0000 entropy=3.3313 ent_coef=0.001867 approx_kl=0.0034 kl_stop=1 intervention_rate=0.0703 front_blocked=0
|
|
[Episode 14820] reward=-54732850.8 actor_loss=0.0803 critic_loss=125133821952.0000 entropy=3.3313 ent_coef=0.001867 approx_kl=0.0028 kl_stop=1 intervention_rate=0.0905 front_blocked=0
|
|
[Eval 14820] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-369307.2 mean_steps=13.8
|
|
[Episode 14830] reward=-53145272.2 actor_loss=0.1104 critic_loss=120410173440.0000 entropy=3.3326 ent_coef=0.001867 approx_kl=0.0005 kl_stop=1 intervention_rate=0.0833 front_blocked=0
|
|
[Episode 14840] reward=-40183975.2 actor_loss=0.1217 critic_loss=117530184704.0000 entropy=3.3315 ent_coef=0.001866 approx_kl=0.0020 kl_stop=1 intervention_rate=0.0807 front_blocked=0
|
|
[Eval 14840] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-469863.3 mean_steps=13.6
|
|
[Episode 14850] reward=-64162720.4 actor_loss=0.1201 critic_loss=126826632533.3333 entropy=3.3366 ent_coef=0.001866 approx_kl=0.0024 kl_stop=1 intervention_rate=0.0885 front_blocked=0
|
|
[Episode 14860] reward=-40973673.5 actor_loss=0.1083 critic_loss=115500485290.6667 entropy=3.3367 ent_coef=0.001866 approx_kl=0.0025 kl_stop=1 intervention_rate=0.0768 front_blocked=0
|
|
[Eval 14860] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-569012.6 mean_steps=12.8
|
|
[Episode 14870] reward=-50070956.1 actor_loss=0.1172 critic_loss=119627617621.3333 entropy=3.3367 ent_coef=0.001866 approx_kl=0.0020 kl_stop=1 intervention_rate=0.0755 front_blocked=0
|
|
[Episode 14880] reward=-52031149.4 actor_loss=0.1429 critic_loss=124174566006.1538 entropy=3.3394 ent_coef=0.001866 approx_kl=0.0049 kl_stop=1 intervention_rate=0.0827 front_blocked=0
|
|
[Eval 14880] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-456845.3 mean_steps=13.2
|
|
[Episode 14890] reward=-50274257.2 actor_loss=0.1088 critic_loss=123295343336.7273 entropy=3.3400 ent_coef=0.001866 approx_kl=0.0052 kl_stop=1 intervention_rate=0.0755 front_blocked=0
|
|
[Episode 14900] reward=-56159174.4 actor_loss=0.1310 critic_loss=128355648658.2857 entropy=3.3374 ent_coef=0.001866 approx_kl=0.0049 kl_stop=1 intervention_rate=0.0820 front_blocked=0
|
|
[Eval 14900] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-338626.2 mean_steps=14.8
|
|
[Episode 14910] reward=-53242795.0 actor_loss=0.1142 critic_loss=125295441510.4000 entropy=3.3356 ent_coef=0.001866 approx_kl=0.0026 kl_stop=1 intervention_rate=0.0794 front_blocked=0
|
|
[Episode 14920] reward=-58111306.6 actor_loss=0.1280 critic_loss=127824286281.1429 entropy=3.3339 ent_coef=0.001866 approx_kl=0.0039 kl_stop=1 intervention_rate=0.0801 front_blocked=0
|
|
[Eval 14920] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-500666.8 mean_steps=13.1
|
|
[Episode 14930] reward=-54715467.5 actor_loss=0.1208 critic_loss=124885592064.0000 entropy=3.3341 ent_coef=0.001866 approx_kl=0.0024 kl_stop=1 intervention_rate=0.0801 front_blocked=0
|
|
[Episode 14940] reward=-49753686.7 actor_loss=0.1356 critic_loss=127586718515.2000 entropy=3.3304 ent_coef=0.001866 approx_kl=0.0033 kl_stop=1 intervention_rate=0.0846 front_blocked=0
|
|
[Eval 14940] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-536593.6 mean_steps=12.7
|
|
[Episode 14950] reward=-54978930.3 actor_loss=0.1319 critic_loss=124404254906.1818 entropy=3.3333 ent_coef=0.001865 approx_kl=0.0032 kl_stop=1 intervention_rate=0.0853 front_blocked=0
|
|
[Episode 14960] reward=-58051255.5 actor_loss=0.1217 critic_loss=130755776512.0000 entropy=3.3310 ent_coef=0.001865 approx_kl=0.0017 kl_stop=1 intervention_rate=0.0840 front_blocked=0
|
|
[Eval 14960] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-488298.3 mean_steps=12.2
|
|
[Episode 14970] reward=-63978149.5 actor_loss=0.1244 critic_loss=128392794112.0000 entropy=3.3293 ent_coef=0.001865 approx_kl=0.0020 kl_stop=1 intervention_rate=0.0924 front_blocked=0
|
|
[Episode 14980] reward=-48718618.5 actor_loss=0.1221 critic_loss=123540285626.1818 entropy=3.3273 ent_coef=0.001865 approx_kl=0.0033 kl_stop=1 intervention_rate=0.0924 front_blocked=0
|
|
[Eval 14980] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-603498.0 mean_steps=11.6
|
|
[Episode 14990] reward=-49865706.7 actor_loss=0.1091 critic_loss=121811513344.0000 entropy=3.3239 ent_coef=0.001865 approx_kl=0.0018 kl_stop=1 intervention_rate=0.0814 front_blocked=0
|
|
[Episode 15000] reward=-46820016.2 actor_loss=0.1199 critic_loss=118920699904.0000 entropy=3.3225 ent_coef=0.001865 approx_kl=0.0042 kl_stop=1 intervention_rate=0.0814 front_blocked=0
|
|
[Eval 15000] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-397834.6 mean_steps=14.6
|
|
[Episode 15010] reward=-54421159.5 actor_loss=0.1109 critic_loss=124503296292.5714 entropy=3.3214 ent_coef=0.001865 approx_kl=0.0043 kl_stop=1 intervention_rate=0.0827 front_blocked=0
|
|
[Episode 15020] reward=-56845696.4 actor_loss=0.1359 critic_loss=127168187830.8571 entropy=3.3211 ent_coef=0.001865 approx_kl=0.0014 kl_stop=1 intervention_rate=0.0833 front_blocked=0
|
|
[Eval 15020] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-303141.3 mean_steps=14.6
|
|
[Episode 15030] reward=-52363979.6 actor_loss=0.1158 critic_loss=125206206171.4286 entropy=3.3201 ent_coef=0.001865 approx_kl=0.0051 kl_stop=1 intervention_rate=0.0846 front_blocked=0
|
|
[Episode 15040] reward=-59026660.1 actor_loss=0.1132 critic_loss=129370247623.1111 entropy=3.3226 ent_coef=0.001865 approx_kl=0.0036 kl_stop=1 intervention_rate=0.0885 front_blocked=0
|
|
[Eval 15040] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-549130.4 mean_steps=11.8
|
|
[Episode 15050] reward=-55056880.5 actor_loss=0.1324 critic_loss=123780491264.0000 entropy=3.3271 ent_coef=0.001865 approx_kl=0.0023 kl_stop=1 intervention_rate=0.0833 front_blocked=0
|
|
[Episode 15060] reward=-66514093.8 actor_loss=0.1537 critic_loss=133070848000.0000 entropy=3.3282 ent_coef=0.001864 approx_kl=0.0039 kl_stop=1 intervention_rate=0.0970 front_blocked=0
|
|
[Eval 15060] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-540728.0 mean_steps=13.2
|
|
[Episode 15070] reward=-53437209.4 actor_loss=0.1223 critic_loss=123771689642.6667 entropy=3.3243 ent_coef=0.001864 approx_kl=0.0027 kl_stop=1 intervention_rate=0.0892 front_blocked=0
|
|
[Episode 15080] reward=-56369639.4 actor_loss=0.1237 critic_loss=126342011835.7333 entropy=3.3224 ent_coef=0.001864 approx_kl=0.0021 kl_stop=1 intervention_rate=0.0918 front_blocked=0
|
|
[Eval 15080] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-579073.4 mean_steps=11.0
|
|
[Episode 15090] reward=-57886088.4 actor_loss=0.1227 critic_loss=124529048234.6667 entropy=3.3226 ent_coef=0.001864 approx_kl=0.0037 kl_stop=1 intervention_rate=0.0807 front_blocked=0
|
|
[Episode 15100] reward=-61417048.7 actor_loss=0.1367 critic_loss=136062370816.0000 entropy=3.3234 ent_coef=0.001864 approx_kl=0.0029 kl_stop=1 intervention_rate=0.0833 front_blocked=0
|
|
[Eval 15100] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-553104.3 mean_steps=11.8
|
|
[Episode 15110] reward=-44101272.7 actor_loss=0.1121 critic_loss=116152147968.0000 entropy=3.3251 ent_coef=0.001864 approx_kl=0.0019 kl_stop=1 intervention_rate=0.0755 front_blocked=0
|
|
[Episode 15120] reward=-64654820.8 actor_loss=0.1554 critic_loss=128405484885.3333 entropy=3.3257 ent_coef=0.001864 approx_kl=0.0038 kl_stop=1 intervention_rate=0.0924 front_blocked=0
|
|
[Eval 15120] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-339170.0 mean_steps=14.8
|
|
[Episode 15130] reward=-55061161.4 actor_loss=0.1211 critic_loss=124783608490.6667 entropy=3.3247 ent_coef=0.001864 approx_kl=0.0013 kl_stop=1 intervention_rate=0.0840 front_blocked=0
|
|
[Episode 15140] reward=-56652935.6 actor_loss=0.1081 critic_loss=128786571946.6667 entropy=3.3222 ent_coef=0.001864 approx_kl=0.0030 kl_stop=1 intervention_rate=0.0827 front_blocked=0
|
|
[Eval 15140] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-580391.8 mean_steps=13.3
|
|
[Episode 15150] reward=-50681871.9 actor_loss=0.1230 critic_loss=124335521792.0000 entropy=3.3224 ent_coef=0.001864 approx_kl=0.0026 kl_stop=1 intervention_rate=0.0775 front_blocked=0
|
|
[Episode 15160] reward=-51193863.0 actor_loss=0.1416 critic_loss=123043883008.0000 entropy=3.3194 ent_coef=0.001864 approx_kl=0.0024 kl_stop=1 intervention_rate=0.0866 front_blocked=0
|
|
[Eval 15160] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-550039.2 mean_steps=12.6
|
|
[Episode 15170] reward=-48702635.6 actor_loss=0.1181 critic_loss=122746017792.0000 entropy=3.3187 ent_coef=0.001863 approx_kl=0.0039 kl_stop=1 intervention_rate=0.0820 front_blocked=0
|
|
[Episode 15180] reward=-54555339.7 actor_loss=0.0898 critic_loss=129018043245.7143 entropy=3.3206 ent_coef=0.001863 approx_kl=0.0049 kl_stop=1 intervention_rate=0.0755 front_blocked=0
|
|
[Eval 15180] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-381327.7 mean_steps=14.4
|
|
[Episode 15190] reward=-63973031.6 actor_loss=0.1432 critic_loss=128551749768.5333 entropy=3.3187 ent_coef=0.001863 approx_kl=0.0017 kl_stop=1 intervention_rate=0.0924 front_blocked=0
|
|
[Episode 15200] reward=-60556887.7 actor_loss=0.1433 critic_loss=130215250602.6667 entropy=3.3226 ent_coef=0.001863 approx_kl=0.0018 kl_stop=1 intervention_rate=0.0944 front_blocked=0
|
|
[Eval 15200] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-495169.3 mean_steps=12.8
|
|
[Episode 15210] reward=-46884018.5 actor_loss=0.1078 critic_loss=120803983945.1429 entropy=3.3215 ent_coef=0.001863 approx_kl=0.0018 kl_stop=1 intervention_rate=0.0814 front_blocked=0
|
|
[Episode 15220] reward=-49074579.9 actor_loss=0.1095 critic_loss=120392654848.0000 entropy=3.3180 ent_coef=0.001863 approx_kl=0.0029 kl_stop=1 intervention_rate=0.0840 front_blocked=0
|
|
[Eval 15220] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-354477.0 mean_steps=14.0
|
|
[Episode 15230] reward=-62596481.5 actor_loss=0.1481 critic_loss=125098427538.2857 entropy=3.3159 ent_coef=0.001863 approx_kl=0.0036 kl_stop=1 intervention_rate=0.0944 front_blocked=0
|
|
[Episode 15240] reward=-61197839.3 actor_loss=0.1130 critic_loss=131038592204.8000 entropy=3.3129 ent_coef=0.001863 approx_kl=0.0033 kl_stop=1 intervention_rate=0.0931 front_blocked=0
|
|
[Eval 15240] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-525647.2 mean_steps=12.0
|
|
[Episode 15250] reward=-60225781.5 actor_loss=0.1253 critic_loss=127301582217.8462 entropy=3.3138 ent_coef=0.001863 approx_kl=0.0043 kl_stop=1 intervention_rate=0.0872 front_blocked=0
|
|
[Episode 15260] reward=-56918986.8 actor_loss=0.1576 critic_loss=125157159731.2000 entropy=3.3134 ent_coef=0.001863 approx_kl=0.0021 kl_stop=1 intervention_rate=0.0827 front_blocked=0
|
|
[Eval 15260] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-452016.4 mean_steps=12.4
|
|
[Episode 15270] reward=-63458876.7 actor_loss=0.1456 critic_loss=133830123520.0000 entropy=3.3114 ent_coef=0.001863 approx_kl=-0.0008 kl_stop=1 intervention_rate=0.0879 front_blocked=0
|
|
[Episode 15280] reward=-46815932.4 actor_loss=0.1211 critic_loss=121655680393.8462 entropy=3.3093 ent_coef=0.001862 approx_kl=0.0035 kl_stop=1 intervention_rate=0.0846 front_blocked=0
|
|
[Eval 15280] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-590772.5 mean_steps=12.8
|
|
[Episode 15290] reward=-52202490.5 actor_loss=0.1202 critic_loss=124352854425.6000 entropy=3.3093 ent_coef=0.001862 approx_kl=0.0045 kl_stop=1 intervention_rate=0.0820 front_blocked=0
|
|
[Episode 15300] reward=-59786375.5 actor_loss=0.1362 critic_loss=127650308096.0000 entropy=3.3117 ent_coef=0.001862 approx_kl=0.0013 kl_stop=1 intervention_rate=0.0879 front_blocked=0
|
|
[Eval 15300] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-339629.4 mean_steps=14.9
|
|
[Episode 15310] reward=-47612461.6 actor_loss=0.0929 critic_loss=121844640972.8000 entropy=3.3123 ent_coef=0.001862 approx_kl=0.0027 kl_stop=1 intervention_rate=0.0690 front_blocked=0
|
|
[Episode 15320] reward=-45079945.2 actor_loss=0.0771 critic_loss=117777714614.8571 entropy=3.3121 ent_coef=0.001862 approx_kl=0.0025 kl_stop=1 intervention_rate=0.0781 front_blocked=0
|
|
[Eval 15320] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-368470.0 mean_steps=14.5
|
|
[Episode 15330] reward=-52250598.1 actor_loss=0.1112 critic_loss=126246685696.0000 entropy=3.3107 ent_coef=0.001862 approx_kl=0.0042 kl_stop=1 intervention_rate=0.0677 front_blocked=0
|
|
[Episode 15340] reward=-52478495.6 actor_loss=0.1211 critic_loss=123528965120.0000 entropy=3.3105 ent_coef=0.001862 approx_kl=0.0049 kl_stop=1 intervention_rate=0.0775 front_blocked=0
|
|
[Eval 15340] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-402103.1 mean_steps=12.9
|
|
[Episode 15350] reward=-46570609.5 actor_loss=0.1133 critic_loss=121624276992.0000 entropy=3.3126 ent_coef=0.001862 approx_kl=0.0018 kl_stop=1 intervention_rate=0.0781 front_blocked=0
|
|
[Episode 15360] reward=-48151993.0 actor_loss=0.1387 critic_loss=119714402759.1111 entropy=3.3106 ent_coef=0.001862 approx_kl=0.0019 kl_stop=1 intervention_rate=0.0768 front_blocked=0
|
|
[Eval 15360] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-634912.5 mean_steps=11.5
|
|
[Episode 15370] reward=-48466021.7 actor_loss=0.0974 critic_loss=123065149440.0000 entropy=3.3066 ent_coef=0.001862 approx_kl=0.0028 kl_stop=1 intervention_rate=0.0749 front_blocked=0
|
|
[Episode 15380] reward=-62492377.4 actor_loss=0.1273 critic_loss=131207081984.0000 entropy=3.3040 ent_coef=0.001862 approx_kl=0.0014 kl_stop=1 intervention_rate=0.0846 front_blocked=0
|
|
[Eval 15380] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-328852.2 mean_steps=15.7
|
|
[Episode 15390] reward=-53624456.0 actor_loss=0.1249 critic_loss=122974304483.5556 entropy=3.2983 ent_coef=0.001861 approx_kl=0.0040 kl_stop=1 intervention_rate=0.0885 front_blocked=0
|
|
[Episode 15400] reward=-52288805.1 actor_loss=0.1051 critic_loss=124585431859.2000 entropy=3.2955 ent_coef=0.001861 approx_kl=0.0047 kl_stop=1 intervention_rate=0.0801 front_blocked=0
|
|
[Eval 15400] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-362427.1 mean_steps=15.2
|
|
[Episode 15410] reward=-52889834.1 actor_loss=0.0921 critic_loss=123763854242.9091 entropy=3.2978 ent_coef=0.001861 approx_kl=0.0038 kl_stop=1 intervention_rate=0.0710 front_blocked=0
|
|
[Episode 15420] reward=-49906963.7 actor_loss=0.1201 critic_loss=124481218150.4000 entropy=3.2977 ent_coef=0.001861 approx_kl=0.0028 kl_stop=1 intervention_rate=0.0775 front_blocked=0
|
|
[Eval 15420] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-390285.5 mean_steps=14.1
|
|
[Episode 15430] reward=-44874596.5 actor_loss=0.0998 critic_loss=120458235904.0000 entropy=3.2978 ent_coef=0.001861 approx_kl=0.0037 kl_stop=1 intervention_rate=0.0788 front_blocked=0
|
|
[Episode 15440] reward=-55917038.9 actor_loss=0.1411 critic_loss=123172379306.6667 entropy=3.2985 ent_coef=0.001861 approx_kl=0.0030 kl_stop=1 intervention_rate=0.0846 front_blocked=0
|
|
[Eval 15440] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-535681.9 mean_steps=13.2
|
|
[Episode 15450] reward=-47185484.7 actor_loss=0.1154 critic_loss=119509882197.3333 entropy=3.2974 ent_coef=0.001861 approx_kl=0.0023 kl_stop=1 intervention_rate=0.0749 front_blocked=0
|
|
[Episode 15460] reward=-40487893.9 actor_loss=0.1165 critic_loss=115058621293.7143 entropy=3.2973 ent_coef=0.001861 approx_kl=0.0035 kl_stop=1 intervention_rate=0.0664 front_blocked=0
|
|
[Eval 15460] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-540443.6 mean_steps=12.6
|
|
[Episode 15470] reward=-60026569.0 actor_loss=0.0884 critic_loss=125211867136.0000 entropy=3.2971 ent_coef=0.001861 approx_kl=0.0030 kl_stop=1 intervention_rate=0.0853 front_blocked=0
|
|
[Episode 15480] reward=-58175415.5 actor_loss=0.1217 critic_loss=125781519564.8000 entropy=3.2943 ent_coef=0.001861 approx_kl=0.0048 kl_stop=1 intervention_rate=0.0820 front_blocked=0
|
|
[Eval 15480] success_rate=0.700 qp_infeasible_rate=0.300 mean_return=-203262.3 mean_steps=16.2
|
|
[Episode 15490] reward=-51753252.7 actor_loss=0.1447 critic_loss=129400395093.3333 entropy=3.2953 ent_coef=0.001861 approx_kl=0.0045 kl_stop=1 intervention_rate=0.0827 front_blocked=0
|
|
[Episode 15500] reward=-48481589.8 actor_loss=0.1230 critic_loss=121083186380.8000 entropy=3.2967 ent_coef=0.001861 approx_kl=0.0018 kl_stop=1 intervention_rate=0.0827 front_blocked=0
|
|
[Eval 15500] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-291629.9 mean_steps=15.2
|
|
[Episode 15510] reward=-47316449.3 actor_loss=0.1224 critic_loss=120805479765.3333 entropy=3.2948 ent_coef=0.001860 approx_kl=0.0021 kl_stop=1 intervention_rate=0.0833 front_blocked=0
|
|
[Episode 15520] reward=-49778969.2 actor_loss=0.0791 critic_loss=122110359259.4286 entropy=3.2932 ent_coef=0.001860 approx_kl=0.0032 kl_stop=1 intervention_rate=0.0775 front_blocked=0
|
|
[Eval 15520] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-394370.0 mean_steps=14.4
|
|
[Episode 15530] reward=-52172804.0 actor_loss=0.1179 critic_loss=125595554247.1111 entropy=3.2942 ent_coef=0.001860 approx_kl=-0.0000 kl_stop=1 intervention_rate=0.0814 front_blocked=0
|
|
[Episode 15540] reward=-52287676.8 actor_loss=0.0823 critic_loss=123583366290.2857 entropy=3.2957 ent_coef=0.001860 approx_kl=0.0038 kl_stop=1 intervention_rate=0.0723 front_blocked=0
|
|
[Eval 15540] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-399512.8 mean_steps=13.7
|
|
[Episode 15550] reward=-64906813.3 actor_loss=0.1440 critic_loss=131652995364.5714 entropy=3.2950 ent_coef=0.001860 approx_kl=0.0042 kl_stop=1 intervention_rate=0.0931 front_blocked=0
|
|
[Episode 15560] reward=-50875363.0 actor_loss=0.1405 critic_loss=121375170560.0000 entropy=3.2933 ent_coef=0.001860 approx_kl=0.0028 kl_stop=1 intervention_rate=0.0918 front_blocked=0
|
|
[Eval 15560] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-458232.9 mean_steps=13.3
|
|
[Episode 15570] reward=-52833127.8 actor_loss=0.1264 critic_loss=125468243781.8182 entropy=3.2945 ent_coef=0.001860 approx_kl=0.0024 kl_stop=1 intervention_rate=0.0885 front_blocked=0
|
|
[Episode 15580] reward=-49425790.3 actor_loss=0.1210 critic_loss=120790271441.4545 entropy=3.2936 ent_coef=0.001860 approx_kl=0.0036 kl_stop=1 intervention_rate=0.0762 front_blocked=0
|
|
[Eval 15580] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-560498.7 mean_steps=12.6
|
|
[Episode 15590] reward=-52541996.9 actor_loss=0.1047 critic_loss=121930419768.8889 entropy=3.2925 ent_coef=0.001860 approx_kl=0.0030 kl_stop=1 intervention_rate=0.0775 front_blocked=0
|
|
[Episode 15600] reward=-58442299.9 actor_loss=0.1168 critic_loss=129123187097.6000 entropy=3.2923 ent_coef=0.001860 approx_kl=0.0028 kl_stop=1 intervention_rate=0.0853 front_blocked=0
|
|
[Eval 15600] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-356176.9 mean_steps=13.6
|
|
[Episode 15610] reward=-59549725.8 actor_loss=0.1188 critic_loss=127972711765.3333 entropy=3.2893 ent_coef=0.001860 approx_kl=0.0054 kl_stop=1 intervention_rate=0.0866 front_blocked=0
|
|
[Episode 15620] reward=-55634908.8 actor_loss=0.1117 critic_loss=129377327104.0000 entropy=3.2888 ent_coef=0.001859 approx_kl=0.0041 kl_stop=1 intervention_rate=0.0892 front_blocked=0
|
|
[Eval 15620] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-479350.0 mean_steps=12.8
|
|
[Episode 15630] reward=-56221893.2 actor_loss=0.1259 critic_loss=126351190016.0000 entropy=3.2865 ent_coef=0.001859 approx_kl=0.0039 kl_stop=1 intervention_rate=0.0853 front_blocked=0
|
|
[Episode 15640] reward=-52754482.3 actor_loss=0.1449 critic_loss=120859508736.0000 entropy=3.2851 ent_coef=0.001859 approx_kl=0.0029 kl_stop=1 intervention_rate=0.0853 front_blocked=0
|
|
[Eval 15640] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-563159.0 mean_steps=12.8
|
|
[Episode 15650] reward=-54557279.3 actor_loss=0.1164 critic_loss=128469977497.6000 entropy=3.2852 ent_coef=0.001859 approx_kl=0.0024 kl_stop=1 intervention_rate=0.0859 front_blocked=0
|
|
[Episode 15660] reward=-59772997.8 actor_loss=0.1126 critic_loss=127789829324.8000 entropy=3.2863 ent_coef=0.001859 approx_kl=0.0012 kl_stop=1 intervention_rate=0.0840 front_blocked=0
|
|
[Eval 15660] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-454206.1 mean_steps=12.5
|
|
[Episode 15670] reward=-42958157.0 actor_loss=0.1217 critic_loss=121413521993.1429 entropy=3.2858 ent_coef=0.001859 approx_kl=0.0038 kl_stop=1 intervention_rate=0.0716 front_blocked=0
|
|
[Episode 15680] reward=-56000047.9 actor_loss=0.1079 critic_loss=128015709525.3333 entropy=3.2839 ent_coef=0.001859 approx_kl=0.0054 kl_stop=1 intervention_rate=0.0794 front_blocked=0
|
|
[Eval 15680] success_rate=0.650 qp_infeasible_rate=0.350 mean_return=-296472.0 mean_steps=16.1
|
|
[Episode 15690] reward=-56262854.2 actor_loss=0.1040 critic_loss=123131090944.0000 entropy=3.2815 ent_coef=0.001859 approx_kl=0.0031 kl_stop=1 intervention_rate=0.0775 front_blocked=0
|
|
[Episode 15700] reward=-59542751.1 actor_loss=0.1196 critic_loss=128170101028.5714 entropy=3.2818 ent_coef=0.001859 approx_kl=0.0035 kl_stop=1 intervention_rate=0.0840 front_blocked=0
|
|
[Eval 15700] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-387642.1 mean_steps=13.9
|
|
[Episode 15710] reward=-43240397.9 actor_loss=0.0726 critic_loss=118777019050.6667 entropy=3.2827 ent_coef=0.001859 approx_kl=0.0028 kl_stop=1 intervention_rate=0.0671 front_blocked=0
|
|
[Episode 15720] reward=-58749423.9 actor_loss=0.1480 critic_loss=128585072640.0000 entropy=3.2811 ent_coef=0.001859 approx_kl=0.0024 kl_stop=1 intervention_rate=0.0827 front_blocked=0
|
|
[Eval 15720] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-437684.6 mean_steps=13.9
|
|
[Episode 15730] reward=-64039688.3 actor_loss=0.1660 critic_loss=129188087808.0000 entropy=3.2797 ent_coef=0.001858 approx_kl=0.0028 kl_stop=1 intervention_rate=0.0990 front_blocked=0
|
|
[Episode 15740] reward=-54806973.0 actor_loss=0.1576 critic_loss=121668972088.8889 entropy=3.2770 ent_coef=0.001858 approx_kl=0.0026 kl_stop=1 intervention_rate=0.0840 front_blocked=0
|
|
[Eval 15740] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-421519.8 mean_steps=14.6
|
|
[Episode 15750] reward=-53264361.5 actor_loss=0.1193 critic_loss=124389110877.0909 entropy=3.2753 ent_coef=0.001858 approx_kl=0.0031 kl_stop=1 intervention_rate=0.0840 front_blocked=0
|
|
[Episode 15760] reward=-55981102.6 actor_loss=0.1363 critic_loss=126966081945.6000 entropy=3.2767 ent_coef=0.001858 approx_kl=0.0068 kl_stop=1 intervention_rate=0.0775 front_blocked=0
|
|
[Eval 15760] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-393762.3 mean_steps=13.9
|
|
[Episode 15770] reward=-54653300.9 actor_loss=0.1114 critic_loss=127257017457.7778 entropy=3.2755 ent_coef=0.001858 approx_kl=0.0042 kl_stop=1 intervention_rate=0.0794 front_blocked=0
|
|
[Episode 15780] reward=-53676075.2 actor_loss=0.1192 critic_loss=119612946432.0000 entropy=3.2750 ent_coef=0.001858 approx_kl=0.0057 kl_stop=1 intervention_rate=0.0820 front_blocked=0
|
|
[Eval 15780] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-455555.6 mean_steps=13.4
|
|
[Episode 15790] reward=-51602656.1 actor_loss=0.1128 critic_loss=123173992155.4286 entropy=3.2730 ent_coef=0.001858 approx_kl=0.0045 kl_stop=1 intervention_rate=0.0801 front_blocked=0
|
|
[Episode 15800] reward=-50684932.2 actor_loss=0.1220 critic_loss=120052507534.2222 entropy=3.2711 ent_coef=0.001858 approx_kl=0.0051 kl_stop=1 intervention_rate=0.0814 front_blocked=0
|
|
[Eval 15800] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-399942.3 mean_steps=13.7
|
|
[Episode 15810] reward=-52185285.9 actor_loss=0.1175 critic_loss=127247212544.0000 entropy=3.2695 ent_coef=0.001858 approx_kl=0.0020 kl_stop=1 intervention_rate=0.0781 front_blocked=0
|
|
[Episode 15820] reward=-52553970.9 actor_loss=0.1156 critic_loss=125817200640.0000 entropy=3.2696 ent_coef=0.001858 approx_kl=0.0034 kl_stop=1 intervention_rate=0.0853 front_blocked=0
|
|
[Eval 15820] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-525594.4 mean_steps=12.6
|
|
[Episode 15830] reward=-58639073.8 actor_loss=0.1695 critic_loss=126725563278.2222 entropy=3.2693 ent_coef=0.001858 approx_kl=0.0021 kl_stop=1 intervention_rate=0.0924 front_blocked=0
|
|
[Episode 15840] reward=-66087113.7 actor_loss=0.1160 critic_loss=134408769536.0000 entropy=3.2718 ent_coef=0.001857 approx_kl=0.0049 kl_stop=1 intervention_rate=0.0924 front_blocked=0
|
|
[Eval 15840] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-611580.1 mean_steps=11.4
|
|
[Episode 15850] reward=-47306045.8 actor_loss=0.1060 critic_loss=117405118008.8889 entropy=3.2670 ent_coef=0.001857 approx_kl=0.0042 kl_stop=1 intervention_rate=0.0742 front_blocked=0
|
|
[Episode 15860] reward=-60247708.2 actor_loss=0.1209 critic_loss=126850209109.3333 entropy=3.2651 ent_coef=0.001857 approx_kl=0.0032 kl_stop=1 intervention_rate=0.0885 front_blocked=0
|
|
[Eval 15860] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-435952.2 mean_steps=13.3
|
|
[Episode 15870] reward=-46872725.1 actor_loss=0.1190 critic_loss=118582895762.2857 entropy=3.2625 ent_coef=0.001857 approx_kl=0.0028 kl_stop=1 intervention_rate=0.0833 front_blocked=0
|
|
[Episode 15880] reward=-59756746.3 actor_loss=0.1039 critic_loss=129707556864.0000 entropy=3.2592 ent_coef=0.001857 approx_kl=0.0023 kl_stop=1 intervention_rate=0.0840 front_blocked=0
|
|
[Eval 15880] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-410123.2 mean_steps=13.8
|
|
[Episode 15890] reward=-46076804.7 actor_loss=0.0753 critic_loss=120480050380.8000 entropy=3.2541 ent_coef=0.001857 approx_kl=0.0037 kl_stop=1 intervention_rate=0.0755 front_blocked=0
|
|
[Episode 15900] reward=-63692711.3 actor_loss=0.1424 critic_loss=126988042240.0000 entropy=3.2524 ent_coef=0.001857 approx_kl=0.0058 kl_stop=1 intervention_rate=0.0964 front_blocked=0
|
|
[Eval 15900] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-509736.0 mean_steps=13.1
|
|
[Episode 15910] reward=-53793288.7 actor_loss=0.1066 critic_loss=123910701056.0000 entropy=3.2538 ent_coef=0.001857 approx_kl=0.0018 kl_stop=1 intervention_rate=0.0885 front_blocked=0
|
|
[Episode 15920] reward=-68470256.2 actor_loss=0.1330 critic_loss=138125865779.2000 entropy=3.2514 ent_coef=0.001857 approx_kl=0.0018 kl_stop=1 intervention_rate=0.0938 front_blocked=0
|
|
[Eval 15920] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-476390.8 mean_steps=13.4
|
|
[Episode 15930] reward=-61497949.0 actor_loss=0.1614 critic_loss=131315955712.0000 entropy=3.2541 ent_coef=0.001857 approx_kl=0.0013 kl_stop=1 intervention_rate=0.0944 front_blocked=0
|
|
[Episode 15940] reward=-61093863.9 actor_loss=0.1283 critic_loss=127772264448.0000 entropy=3.2535 ent_coef=0.001857 approx_kl=0.0043 kl_stop=1 intervention_rate=0.0833 front_blocked=0
|
|
[Eval 15940] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-482924.2 mean_steps=13.5
|
|
[Episode 15950] reward=-51771837.2 actor_loss=0.1253 critic_loss=122131154716.4444 entropy=3.2521 ent_coef=0.001856 approx_kl=0.0057 kl_stop=1 intervention_rate=0.0951 front_blocked=0
|
|
[Episode 15960] reward=-49628683.3 actor_loss=0.1000 critic_loss=122440006656.0000 entropy=3.2520 ent_coef=0.001856 approx_kl=0.0014 kl_stop=1 intervention_rate=0.0768 front_blocked=0
|
|
[Eval 15960] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-477823.1 mean_steps=13.9
|
|
[Episode 15970] reward=-56391912.8 actor_loss=0.1581 critic_loss=121121619057.7778 entropy=3.2490 ent_coef=0.001856 approx_kl=0.0028 kl_stop=1 intervention_rate=0.0879 front_blocked=0
|
|
[Episode 15980] reward=-51889894.2 actor_loss=0.1533 critic_loss=121899305642.6667 entropy=3.2478 ent_coef=0.001856 approx_kl=0.0034 kl_stop=1 intervention_rate=0.0951 front_blocked=0
|
|
[Eval 15980] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-313870.3 mean_steps=15.4
|
|
[Episode 15990] reward=-49624455.8 actor_loss=0.1158 critic_loss=123909315584.0000 entropy=3.2483 ent_coef=0.001856 approx_kl=0.0047 kl_stop=1 intervention_rate=0.0749 front_blocked=0
|
|
[Episode 16000] reward=-52053564.3 actor_loss=0.1346 critic_loss=123239912789.3333 entropy=3.2436 ent_coef=0.001856 approx_kl=0.0037 kl_stop=1 intervention_rate=0.0807 front_blocked=0
|
|
[Eval 16000] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-646629.0 mean_steps=10.7
|
|
[Episode 16010] reward=-59150511.1 actor_loss=0.1274 critic_loss=125675186858.6667 entropy=3.2410 ent_coef=0.001856 approx_kl=0.0023 kl_stop=1 intervention_rate=0.0938 front_blocked=0
|
|
[Episode 16020] reward=-49222664.1 actor_loss=0.0960 critic_loss=121849570450.2857 entropy=3.2395 ent_coef=0.001856 approx_kl=0.0042 kl_stop=1 intervention_rate=0.0775 front_blocked=0
|
|
[Eval 16020] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-485442.9 mean_steps=12.2
|
|
[Episode 16030] reward=-43712431.9 actor_loss=0.1457 critic_loss=119198400512.0000 entropy=3.2399 ent_coef=0.001856 approx_kl=0.0020 kl_stop=1 intervention_rate=0.0749 front_blocked=0
|
|
[Episode 16040] reward=-67050325.1 actor_loss=0.1685 critic_loss=130261909504.0000 entropy=3.2393 ent_coef=0.001856 approx_kl=0.0041 kl_stop=1 intervention_rate=0.1029 front_blocked=0
|
|
[Eval 16040] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-377079.4 mean_steps=14.4
|
|
[Episode 16050] reward=-48539244.5 actor_loss=0.1157 critic_loss=119005807336.7273 entropy=3.2360 ent_coef=0.001856 approx_kl=0.0033 kl_stop=1 intervention_rate=0.0807 front_blocked=0
|
|
[Episode 16060] reward=-48318616.9 actor_loss=0.1295 critic_loss=118175609366.2609 entropy=3.2357 ent_coef=0.001855 approx_kl=0.0027 kl_stop=1 intervention_rate=0.0853 front_blocked=0
|
|
[Eval 16060] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-397816.0 mean_steps=13.8
|
|
[Episode 16070] reward=-58160874.9 actor_loss=0.1345 critic_loss=124977642837.3333 entropy=3.2320 ent_coef=0.001855 approx_kl=0.0038 kl_stop=1 intervention_rate=0.0918 front_blocked=0
|
|
[Episode 16080] reward=-47121345.6 actor_loss=0.0959 critic_loss=117865631158.8571 entropy=3.2298 ent_coef=0.001855 approx_kl=0.0025 kl_stop=1 intervention_rate=0.0723 front_blocked=0
|
|
[Eval 16080] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-412060.4 mean_steps=14.7
|
|
[Episode 16090] reward=-51758083.8 actor_loss=0.1046 critic_loss=122868918564.5714 entropy=3.2289 ent_coef=0.001855 approx_kl=0.0038 kl_stop=1 intervention_rate=0.0866 front_blocked=0
|
|
[Episode 16100] reward=-61211588.9 actor_loss=0.1244 critic_loss=129055956992.0000 entropy=3.2235 ent_coef=0.001855 approx_kl=0.0020 kl_stop=1 intervention_rate=0.0931 front_blocked=0
|
|
[Eval 16100] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-379045.5 mean_steps=13.5
|
|
[Episode 16110] reward=-50752843.9 actor_loss=0.0968 critic_loss=122003300352.0000 entropy=3.2229 ent_coef=0.001855 approx_kl=0.0020 kl_stop=1 intervention_rate=0.0801 front_blocked=0
|
|
[Episode 16120] reward=-57185517.8 actor_loss=0.1012 critic_loss=124373364736.0000 entropy=3.2230 ent_coef=0.001855 approx_kl=0.0028 kl_stop=1 intervention_rate=0.0775 front_blocked=0
|
|
[Eval 16120] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-544839.5 mean_steps=13.1
|
|
[Episode 16130] reward=-52803379.1 actor_loss=0.1930 critic_loss=122686869504.0000 entropy=3.2230 ent_coef=0.001855 approx_kl=0.0048 kl_stop=1 intervention_rate=0.0833 front_blocked=0
|
|
[Episode 16140] reward=-48613246.4 actor_loss=0.1291 critic_loss=124353195008.0000 entropy=3.2213 ent_coef=0.001855 approx_kl=0.0018 kl_stop=1 intervention_rate=0.0833 front_blocked=0
|
|
[Eval 16140] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-369849.0 mean_steps=14.6
|
|
[Episode 16150] reward=-51589244.4 actor_loss=0.0988 critic_loss=122628376985.6000 entropy=3.2176 ent_coef=0.001855 approx_kl=0.0035 kl_stop=1 intervention_rate=0.0788 front_blocked=0
|
|
[Episode 16160] reward=-52349447.7 actor_loss=0.1118 critic_loss=125443730432.0000 entropy=3.2134 ent_coef=0.001855 approx_kl=0.0034 kl_stop=1 intervention_rate=0.0866 front_blocked=0
|
|
[Eval 16160] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-321049.6 mean_steps=15.6
|
|
[Episode 16170] reward=-52493918.9 actor_loss=0.1055 critic_loss=124213633024.0000 entropy=3.2125 ent_coef=0.001854 approx_kl=0.0005 kl_stop=1 intervention_rate=0.0846 front_blocked=0
|
|
[Episode 16180] reward=-59494662.2 actor_loss=0.1217 critic_loss=126258607104.0000 entropy=3.2133 ent_coef=0.001854 approx_kl=0.0042 kl_stop=1 intervention_rate=0.0931 front_blocked=0
|
|
[Eval 16180] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-501748.9 mean_steps=13.1
|
|
[Episode 16190] reward=-53594280.4 actor_loss=0.0979 critic_loss=120326067541.3333 entropy=3.2106 ent_coef=0.001854 approx_kl=0.0056 kl_stop=1 intervention_rate=0.0840 front_blocked=0
|
|
[Episode 16200] reward=-42071376.1 actor_loss=0.0937 critic_loss=117911143424.0000 entropy=3.2118 ent_coef=0.001854 approx_kl=0.0057 kl_stop=1 intervention_rate=0.0755 front_blocked=0
|
|
[Eval 16200] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-535575.1 mean_steps=12.1
|
|
[Episode 16210] reward=-48267119.4 actor_loss=0.0903 critic_loss=119889916359.1111 entropy=3.2085 ent_coef=0.001854 approx_kl=0.0019 kl_stop=1 intervention_rate=0.0762 front_blocked=0
|
|
[Episode 16220] reward=-49351615.2 actor_loss=0.1156 critic_loss=119615950392.8889 entropy=3.2064 ent_coef=0.001854 approx_kl=0.0019 kl_stop=1 intervention_rate=0.0820 front_blocked=0
|
|
[Eval 16220] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-561705.3 mean_steps=12.7
|
|
[Episode 16230] reward=-49468041.3 actor_loss=0.1008 critic_loss=119360795443.2000 entropy=3.2065 ent_coef=0.001854 approx_kl=0.0046 kl_stop=1 intervention_rate=0.0872 front_blocked=0
|
|
[Episode 16240] reward=-53091488.1 actor_loss=0.1235 critic_loss=123773150208.0000 entropy=3.2057 ent_coef=0.001854 approx_kl=0.0050 kl_stop=1 intervention_rate=0.0781 front_blocked=0
|
|
[Eval 16240] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-537145.5 mean_steps=12.4
|
|
[Episode 16250] reward=-51460567.7 actor_loss=0.1381 critic_loss=124261959680.0000 entropy=3.2060 ent_coef=0.001854 approx_kl=0.0023 kl_stop=1 intervention_rate=0.0840 front_blocked=0
|
|
[Episode 16260] reward=-43142008.5 actor_loss=0.1414 critic_loss=119266711552.0000 entropy=3.2056 ent_coef=0.001854 approx_kl=0.0046 kl_stop=1 intervention_rate=0.0801 front_blocked=0
|
|
[Eval 16260] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-479228.2 mean_steps=12.6
|
|
[Episode 16270] reward=-51860698.6 actor_loss=0.1341 critic_loss=121187914043.0769 entropy=3.2057 ent_coef=0.001854 approx_kl=0.0026 kl_stop=1 intervention_rate=0.0938 front_blocked=0
|
|
[Episode 16280] reward=-46959767.3 actor_loss=0.1206 critic_loss=121398654683.4286 entropy=3.2070 ent_coef=0.001853 approx_kl=0.0029 kl_stop=1 intervention_rate=0.0833 front_blocked=0
|
|
[Eval 16280] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-325890.8 mean_steps=14.2
|
|
[Episode 16290] reward=-58063385.2 actor_loss=0.1161 critic_loss=127956536139.2941 entropy=3.2093 ent_coef=0.001853 approx_kl=0.0028 kl_stop=1 intervention_rate=0.0885 front_blocked=0
|
|
[Episode 16300] reward=-51890477.3 actor_loss=0.1133 critic_loss=120975085568.0000 entropy=3.2040 ent_coef=0.001853 approx_kl=0.0033 kl_stop=1 intervention_rate=0.0807 front_blocked=0
|
|
[Eval 16300] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-509701.4 mean_steps=12.2
|
|
[Episode 16310] reward=-61694936.7 actor_loss=0.1433 critic_loss=132973584930.1333 entropy=3.2024 ent_coef=0.001853 approx_kl=0.0029 kl_stop=1 intervention_rate=0.0892 front_blocked=0
|
|
[Episode 16320] reward=-45149675.9 actor_loss=0.1091 critic_loss=123195435690.6667 entropy=3.2011 ent_coef=0.001853 approx_kl=0.0006 kl_stop=1 intervention_rate=0.0742 front_blocked=0
|
|
[Eval 16320] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-597712.6 mean_steps=11.2
|
|
[Episode 16330] reward=-60294741.5 actor_loss=0.1247 critic_loss=127860404809.1429 entropy=3.2016 ent_coef=0.001853 approx_kl=0.0034 kl_stop=1 intervention_rate=0.0853 front_blocked=0
|
|
[Episode 16340] reward=-51484750.8 actor_loss=0.1294 critic_loss=119777610410.6667 entropy=3.2024 ent_coef=0.001853 approx_kl=0.0038 kl_stop=1 intervention_rate=0.0801 front_blocked=0
|
|
[Eval 16340] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-434380.9 mean_steps=13.4
|
|
[Episode 16350] reward=-53836671.7 actor_loss=0.1018 critic_loss=122637890901.3333 entropy=3.2037 ent_coef=0.001853 approx_kl=0.0034 kl_stop=1 intervention_rate=0.0775 front_blocked=0
|
|
[Episode 16360] reward=-49869414.1 actor_loss=0.1491 critic_loss=119536125542.4000 entropy=3.2024 ent_coef=0.001853 approx_kl=0.0038 kl_stop=1 intervention_rate=0.0755 front_blocked=0
|
|
[Eval 16360] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-428506.8 mean_steps=13.9
|
|
[Episode 16370] reward=-52861086.7 actor_loss=0.1033 critic_loss=125545350290.2857 entropy=3.2015 ent_coef=0.001853 approx_kl=0.0033 kl_stop=1 intervention_rate=0.0820 front_blocked=0
|
|
[Episode 16380] reward=-50794898.3 actor_loss=0.1108 critic_loss=127526763178.6667 entropy=3.1986 ent_coef=0.001853 approx_kl=0.0036 kl_stop=1 intervention_rate=0.0814 front_blocked=0
|
|
[Eval 16380] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-397438.9 mean_steps=13.9
|
|
[Episode 16390] reward=-50868503.5 actor_loss=0.1007 critic_loss=121564070798.2222 entropy=3.1989 ent_coef=0.001852 approx_kl=0.0032 kl_stop=1 intervention_rate=0.0729 front_blocked=0
|
|
[Episode 16400] reward=-56192708.4 actor_loss=0.1051 critic_loss=125332823319.2727 entropy=3.1999 ent_coef=0.001852 approx_kl=0.0025 kl_stop=1 intervention_rate=0.0853 front_blocked=0
|
|
[Eval 16400] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-364660.1 mean_steps=14.3
|
|
[Episode 16410] reward=-66088598.0 actor_loss=0.1050 critic_loss=129536794624.0000 entropy=3.1985 ent_coef=0.001852 approx_kl=0.0034 kl_stop=1 intervention_rate=0.0892 front_blocked=0
|
|
[Episode 16420] reward=-43182825.8 actor_loss=0.1137 critic_loss=117696249856.0000 entropy=3.1981 ent_coef=0.001852 approx_kl=0.0031 kl_stop=1 intervention_rate=0.0742 front_blocked=0
|
|
[Eval 16420] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-450716.3 mean_steps=13.3
|
|
[Episode 16430] reward=-43901253.8 actor_loss=0.1224 critic_loss=119003504640.0000 entropy=3.1965 ent_coef=0.001852 approx_kl=0.0026 kl_stop=1 intervention_rate=0.0814 front_blocked=0
|
|
[Episode 16440] reward=-51163914.3 actor_loss=0.0887 critic_loss=124922204160.0000 entropy=3.1958 ent_coef=0.001852 approx_kl=0.0050 kl_stop=1 intervention_rate=0.0742 front_blocked=0
|
|
[Eval 16440] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-524394.4 mean_steps=12.2
|
|
[Episode 16450] reward=-55757719.1 actor_loss=0.1080 critic_loss=125405624027.4286 entropy=3.1972 ent_coef=0.001852 approx_kl=0.0024 kl_stop=1 intervention_rate=0.0820 front_blocked=0
|
|
[Episode 16460] reward=-58119258.6 actor_loss=0.1163 critic_loss=123646789632.0000 entropy=3.1932 ent_coef=0.001852 approx_kl=0.0044 kl_stop=1 intervention_rate=0.0898 front_blocked=0
|
|
[Eval 16460] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-409836.9 mean_steps=14.6
|
|
[Episode 16470] reward=-43528931.7 actor_loss=0.1099 critic_loss=116350272853.3333 entropy=3.1925 ent_coef=0.001852 approx_kl=0.0042 kl_stop=1 intervention_rate=0.0755 front_blocked=0
|
|
[Episode 16480] reward=-56345191.8 actor_loss=0.1221 critic_loss=126981229226.6667 entropy=3.1929 ent_coef=0.001852 approx_kl=0.0024 kl_stop=1 intervention_rate=0.0905 front_blocked=0
|
|
[Eval 16480] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-423849.1 mean_steps=14.2
|
|
[Episode 16490] reward=-57908238.5 actor_loss=0.0754 critic_loss=125133200384.0000 entropy=3.1925 ent_coef=0.001852 approx_kl=0.0023 kl_stop=1 intervention_rate=0.0938 front_blocked=0
|
|
[Episode 16500] reward=-53108984.8 actor_loss=0.1094 critic_loss=123663967232.0000 entropy=3.1925 ent_coef=0.001852 approx_kl=0.0015 kl_stop=1 intervention_rate=0.0775 front_blocked=0
|
|
[Eval 16500] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-632626.6 mean_steps=11.4
|
|
[Episode 16510] reward=-52131350.4 actor_loss=0.0978 critic_loss=126824777318.4000 entropy=3.1941 ent_coef=0.001851 approx_kl=0.0041 kl_stop=1 intervention_rate=0.0846 front_blocked=0
|
|
[Episode 16520] reward=-50998241.8 actor_loss=0.1066 critic_loss=120171211434.6667 entropy=3.1984 ent_coef=0.001851 approx_kl=0.0026 kl_stop=1 intervention_rate=0.0781 front_blocked=0
|
|
[Eval 16520] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-533640.8 mean_steps=12.3
|
|
[Episode 16530] reward=-65613623.4 actor_loss=0.1103 critic_loss=134568818395.4286 entropy=3.1982 ent_coef=0.001851 approx_kl=0.0060 kl_stop=1 intervention_rate=0.0859 front_blocked=0
|
|
[Episode 16540] reward=-49273203.1 actor_loss=0.0849 critic_loss=120915813376.0000 entropy=3.1946 ent_coef=0.001851 approx_kl=0.0027 kl_stop=1 intervention_rate=0.0762 front_blocked=0
|
|
[Eval 16540] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-538825.7 mean_steps=11.7
|
|
[Episode 16550] reward=-61386726.3 actor_loss=0.1165 critic_loss=130723906166.1538 entropy=3.1971 ent_coef=0.001851 approx_kl=0.0025 kl_stop=1 intervention_rate=0.0918 front_blocked=0
|
|
[Episode 16560] reward=-63632829.4 actor_loss=0.1236 critic_loss=129000623445.3333 entropy=3.1991 ent_coef=0.001851 approx_kl=0.0027 kl_stop=1 intervention_rate=0.0879 front_blocked=0
|
|
[Eval 16560] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-455043.6 mean_steps=13.3
|
|
[Episode 16570] reward=-64940666.0 actor_loss=0.1139 critic_loss=132542629660.4444 entropy=3.1998 ent_coef=0.001851 approx_kl=0.0016 kl_stop=1 intervention_rate=0.0996 front_blocked=0
|
|
[Episode 16580] reward=-64948874.6 actor_loss=0.1255 critic_loss=133042844020.3636 entropy=3.1980 ent_coef=0.001851 approx_kl=0.0050 kl_stop=1 intervention_rate=0.0872 front_blocked=0
|
|
[Eval 16580] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-325793.1 mean_steps=15.1
|
|
[Episode 16590] reward=-52104849.7 actor_loss=0.1288 critic_loss=116401610752.0000 entropy=3.1979 ent_coef=0.001851 approx_kl=0.0029 kl_stop=1 intervention_rate=0.0872 front_blocked=0
|
|
[Episode 16600] reward=-53383969.4 actor_loss=0.0854 critic_loss=122725367808.0000 entropy=3.1968 ent_coef=0.001851 approx_kl=0.0021 kl_stop=1 intervention_rate=0.0768 front_blocked=0
|
|
[Eval 16600] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-552991.5 mean_steps=12.7
|
|
[Episode 16610] reward=-57500243.3 actor_loss=0.1097 critic_loss=128644881261.7143 entropy=3.1948 ent_coef=0.001851 approx_kl=0.0046 kl_stop=1 intervention_rate=0.0768 front_blocked=0
|
|
[Episode 16620] reward=-51093511.4 actor_loss=0.1120 critic_loss=123905479475.2000 entropy=3.1976 ent_coef=0.001850 approx_kl=0.0027 kl_stop=1 intervention_rate=0.0833 front_blocked=0
|
|
[Eval 16620] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-443323.7 mean_steps=13.6
|
|
[Episode 16630] reward=-49033695.3 actor_loss=0.0973 critic_loss=121981613056.0000 entropy=3.1997 ent_coef=0.001850 approx_kl=0.0036 kl_stop=1 intervention_rate=0.0801 front_blocked=0
|
|
[Episode 16640] reward=-48109201.2 actor_loss=0.1073 critic_loss=120596088149.3333 entropy=3.1985 ent_coef=0.001850 approx_kl=0.0043 kl_stop=1 intervention_rate=0.0762 front_blocked=0
|
|
[Eval 16640] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-490613.3 mean_steps=12.8
|
|
[Episode 16650] reward=-57464074.3 actor_loss=0.1123 critic_loss=128462018373.8182 entropy=3.1968 ent_coef=0.001850 approx_kl=0.0031 kl_stop=1 intervention_rate=0.0853 front_blocked=0
|
|
[Episode 16660] reward=-48253224.1 actor_loss=0.1303 critic_loss=124209612572.4444 entropy=3.1962 ent_coef=0.001850 approx_kl=0.0057 kl_stop=1 intervention_rate=0.0807 front_blocked=0
|
|
[Eval 16660] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-415455.3 mean_steps=14.0
|
|
[Episode 16670] reward=-39882680.1 actor_loss=0.0841 critic_loss=118207085847.2727 entropy=3.1920 ent_coef=0.001850 approx_kl=0.0008 kl_stop=1 intervention_rate=0.0781 front_blocked=0
|
|
[Episode 16680] reward=-52019341.6 actor_loss=0.1001 critic_loss=123541220010.6667 entropy=3.1891 ent_coef=0.001850 approx_kl=0.0038 kl_stop=1 intervention_rate=0.0781 front_blocked=0
|
|
[Eval 16680] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-453140.0 mean_steps=13.8
|
|
[Episode 16690] reward=-62987336.6 actor_loss=0.1055 critic_loss=132545054720.0000 entropy=3.1859 ent_coef=0.001850 approx_kl=0.0030 kl_stop=1 intervention_rate=0.0853 front_blocked=0
|
|
[Episode 16700] reward=-49864644.8 actor_loss=0.0991 critic_loss=121865302016.0000 entropy=3.1874 ent_coef=0.001850 approx_kl=0.0012 kl_stop=1 intervention_rate=0.0729 front_blocked=0
|
|
[Eval 16700] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-432040.7 mean_steps=13.3
|
|
[Episode 16710] reward=-55984581.4 actor_loss=0.1132 critic_loss=125467140915.2000 entropy=3.1854 ent_coef=0.001850 approx_kl=0.0020 kl_stop=1 intervention_rate=0.0840 front_blocked=0
|
|
[Episode 16720] reward=-57155887.1 actor_loss=0.1404 critic_loss=129367880704.0000 entropy=3.1852 ent_coef=0.001850 approx_kl=0.0017 kl_stop=1 intervention_rate=0.0859 front_blocked=0
|
|
[Eval 16720] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-599145.0 mean_steps=12.1
|
|
[Episode 16730] reward=-48307819.9 actor_loss=0.1258 critic_loss=124976417177.6000 entropy=3.1808 ent_coef=0.001849 approx_kl=0.0032 kl_stop=1 intervention_rate=0.0814 front_blocked=0
|
|
[Episode 16740] reward=-60032325.7 actor_loss=0.1374 critic_loss=124862286116.5714 entropy=3.1793 ent_coef=0.001849 approx_kl=0.0051 kl_stop=1 intervention_rate=0.0879 front_blocked=0
|
|
[Eval 16740] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-383120.7 mean_steps=14.7
|
|
[Episode 16750] reward=-56361059.7 actor_loss=0.1204 critic_loss=128470753280.0000 entropy=3.1792 ent_coef=0.001849 approx_kl=0.0031 kl_stop=1 intervention_rate=0.0840 front_blocked=0
|
|
[Episode 16760] reward=-50649945.5 actor_loss=0.1531 critic_loss=120913666792.7273 entropy=3.1783 ent_coef=0.001849 approx_kl=0.0043 kl_stop=1 intervention_rate=0.0827 front_blocked=0
|
|
[Eval 16760] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-436338.8 mean_steps=14.1
|
|
[Episode 16770] reward=-61229458.5 actor_loss=0.1167 critic_loss=129470507300.5714 entropy=3.1734 ent_coef=0.001849 approx_kl=0.0058 kl_stop=1 intervention_rate=0.0833 front_blocked=0
|
|
[Episode 16780] reward=-63177073.4 actor_loss=0.1370 critic_loss=128813213013.3333 entropy=3.1726 ent_coef=0.001849 approx_kl=0.0047 kl_stop=1 intervention_rate=0.0918 front_blocked=0
|
|
[Eval 16780] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-322395.1 mean_steps=15.7
|
|
[Episode 16790] reward=-65090201.1 actor_loss=0.1432 critic_loss=135800671436.8000 entropy=3.1749 ent_coef=0.001849 approx_kl=-0.0012 kl_stop=1 intervention_rate=0.0970 front_blocked=0
|
|
[Episode 16800] reward=-52950534.5 actor_loss=0.1254 critic_loss=123341243733.3333 entropy=3.1755 ent_coef=0.001849 approx_kl=0.0036 kl_stop=1 intervention_rate=0.0794 front_blocked=0
|
|
[Eval 16800] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-459784.1 mean_steps=14.4
|
|
[Episode 16810] reward=-53346399.0 actor_loss=0.1007 critic_loss=116141256704.0000 entropy=3.1762 ent_coef=0.001849 approx_kl=0.0058 kl_stop=1 intervention_rate=0.0801 front_blocked=0
|
|
[Episode 16820] reward=-50090633.6 actor_loss=0.1291 critic_loss=124263032149.3333 entropy=3.1744 ent_coef=0.001849 approx_kl=0.0021 kl_stop=1 intervention_rate=0.0833 front_blocked=0
|
|
[Eval 16820] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-506485.1 mean_steps=13.0
|
|
[Episode 16830] reward=-48823521.5 actor_loss=0.1193 critic_loss=119032897536.0000 entropy=3.1718 ent_coef=0.001849 approx_kl=0.0040 kl_stop=1 intervention_rate=0.0742 front_blocked=0
|
|
[Episode 16840] reward=-55229508.4 actor_loss=0.1298 critic_loss=123103136426.6667 entropy=3.1725 ent_coef=0.001848 approx_kl=0.0033 kl_stop=1 intervention_rate=0.0879 front_blocked=0
|
|
[Eval 16840] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-426566.5 mean_steps=12.7
|
|
[Episode 16850] reward=-57328287.0 actor_loss=0.1230 critic_loss=129402440908.8000 entropy=3.1711 ent_coef=0.001848 approx_kl=0.0072 kl_stop=1 intervention_rate=0.0814 front_blocked=0
|
|
[Episode 16860] reward=-49595984.3 actor_loss=0.1184 critic_loss=121341573802.6667 entropy=3.1757 ent_coef=0.001848 approx_kl=0.0039 kl_stop=1 intervention_rate=0.0794 front_blocked=0
|
|
[Eval 16860] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-462616.5 mean_steps=12.7
|
|
[Episode 16870] reward=-49543284.9 actor_loss=0.1033 critic_loss=123163388586.6667 entropy=3.1731 ent_coef=0.001848 approx_kl=0.0044 kl_stop=1 intervention_rate=0.0794 front_blocked=0
|
|
[Episode 16880] reward=-53104240.3 actor_loss=0.1347 critic_loss=124090804224.0000 entropy=3.1730 ent_coef=0.001848 approx_kl=0.0043 kl_stop=1 intervention_rate=0.0905 front_blocked=0
|
|
[Eval 16880] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-446370.2 mean_steps=13.5
|
|
[Episode 16890] reward=-38589424.3 actor_loss=0.1105 critic_loss=112165523911.1111 entropy=3.1753 ent_coef=0.001848 approx_kl=0.0037 kl_stop=1 intervention_rate=0.0690 front_blocked=0
|
|
[Episode 16900] reward=-53725653.6 actor_loss=0.1211 critic_loss=124151284882.2857 entropy=3.1752 ent_coef=0.001848 approx_kl=0.0029 kl_stop=1 intervention_rate=0.0788 front_blocked=0
|
|
[Eval 16900] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-492592.7 mean_steps=14.0
|
|
[Episode 16910] reward=-57529832.5 actor_loss=0.1439 critic_loss=124201709112.8889 entropy=3.1747 ent_coef=0.001848 approx_kl=0.0029 kl_stop=1 intervention_rate=0.0853 front_blocked=0
|
|
[Episode 16920] reward=-56497353.7 actor_loss=0.1084 critic_loss=125630453350.4000 entropy=3.1727 ent_coef=0.001848 approx_kl=0.0042 kl_stop=1 intervention_rate=0.0859 front_blocked=0
|
|
[Eval 16920] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-426136.7 mean_steps=14.2
|
|
[Episode 16930] reward=-49759121.4 actor_loss=0.1251 critic_loss=117784843605.3333 entropy=3.1730 ent_coef=0.001848 approx_kl=0.0039 kl_stop=1 intervention_rate=0.0859 front_blocked=0
|
|
[Episode 16940] reward=-55601654.1 actor_loss=0.0896 critic_loss=124761307293.5385 entropy=3.1746 ent_coef=0.001848 approx_kl=0.0031 kl_stop=1 intervention_rate=0.0820 front_blocked=0
|
|
[Eval 16940] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-497916.7 mean_steps=12.2
|
|
[Episode 16950] reward=-54381383.4 actor_loss=0.0997 critic_loss=122232709120.0000 entropy=3.1742 ent_coef=0.001847 approx_kl=0.0033 kl_stop=1 intervention_rate=0.0918 front_blocked=0
|
|
[Episode 16960] reward=-50980217.7 actor_loss=0.0964 critic_loss=119171837952.0000 entropy=3.1741 ent_coef=0.001847 approx_kl=0.0052 kl_stop=1 intervention_rate=0.0885 front_blocked=0
|
|
[Eval 16960] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-477665.5 mean_steps=13.8
|
|
[Episode 16970] reward=-59055585.7 actor_loss=0.0767 critic_loss=122197274624.0000 entropy=3.1734 ent_coef=0.001847 approx_kl=0.0028 kl_stop=1 intervention_rate=0.0814 front_blocked=0
|
|
[Episode 16980] reward=-45527334.7 actor_loss=0.1224 critic_loss=116167081984.0000 entropy=3.1700 ent_coef=0.001847 approx_kl=0.0009 kl_stop=1 intervention_rate=0.0801 front_blocked=0
|
|
[Eval 16980] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-644110.3 mean_steps=10.7
|
|
[Episode 16990] reward=-52939381.5 actor_loss=0.1212 critic_loss=121543537664.0000 entropy=3.1698 ent_coef=0.001847 approx_kl=0.0050 kl_stop=1 intervention_rate=0.0768 front_blocked=0
|
|
[Episode 17000] reward=-58191229.5 actor_loss=0.1238 critic_loss=126487071402.6667 entropy=3.1696 ent_coef=0.001847 approx_kl=0.0040 kl_stop=1 intervention_rate=0.0840 front_blocked=0
|
|
[Eval 17000] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-505653.4 mean_steps=12.2
|
|
[Episode 17010] reward=-67481297.6 actor_loss=0.1240 critic_loss=130600173568.0000 entropy=3.1678 ent_coef=0.001847 approx_kl=0.0031 kl_stop=1 intervention_rate=0.0866 front_blocked=0
|
|
[Episode 17020] reward=-52591881.3 actor_loss=0.1182 critic_loss=121103420074.6667 entropy=3.1675 ent_coef=0.001847 approx_kl=0.0047 kl_stop=1 intervention_rate=0.0840 front_blocked=0
|
|
[Eval 17020] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-584602.5 mean_steps=12.2
|
|
[Episode 17030] reward=-51221180.1 actor_loss=0.1146 critic_loss=120976829553.7778 entropy=3.1672 ent_coef=0.001847 approx_kl=0.0034 kl_stop=1 intervention_rate=0.0853 front_blocked=0
|
|
[Episode 17040] reward=-58375966.7 actor_loss=0.1791 critic_loss=126584030003.2000 entropy=3.1660 ent_coef=0.001847 approx_kl=0.0039 kl_stop=1 intervention_rate=0.0944 front_blocked=0
|
|
[Eval 17040] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-560955.0 mean_steps=12.0
|
|
[Episode 17050] reward=-55916154.4 actor_loss=0.1029 critic_loss=124778479616.0000 entropy=3.1668 ent_coef=0.001847 approx_kl=0.0056 kl_stop=1 intervention_rate=0.0827 front_blocked=0
|
|
[Episode 17060] reward=-53604007.0 actor_loss=0.1021 critic_loss=124652665059.5556 entropy=3.1653 ent_coef=0.001846 approx_kl=0.0021 kl_stop=1 intervention_rate=0.0794 front_blocked=0
|
|
[Eval 17060] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-554090.4 mean_steps=11.2
|
|
[Episode 17070] reward=-53114285.7 actor_loss=0.1205 critic_loss=126562850133.3333 entropy=3.1622 ent_coef=0.001846 approx_kl=0.0048 kl_stop=1 intervention_rate=0.0859 front_blocked=0
|
|
[Episode 17080] reward=-55229564.4 actor_loss=0.1107 critic_loss=122527760384.0000 entropy=3.1620 ent_coef=0.001846 approx_kl=0.0037 kl_stop=1 intervention_rate=0.0807 front_blocked=0
|
|
[Eval 17080] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-440004.2 mean_steps=13.3
|
|
[Episode 17090] reward=-65115340.7 actor_loss=0.1406 critic_loss=127367850666.6667 entropy=3.1589 ent_coef=0.001846 approx_kl=0.0087 kl_stop=1 intervention_rate=0.0898 front_blocked=0
|
|
[Episode 17100] reward=-57378277.9 actor_loss=0.1196 critic_loss=127075784021.3333 entropy=3.1582 ent_coef=0.001846 approx_kl=0.0015 kl_stop=1 intervention_rate=0.0853 front_blocked=0
|
|
[Eval 17100] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-586773.0 mean_steps=11.8
|
|
[Episode 17110] reward=-50828431.5 actor_loss=0.1081 critic_loss=114904836778.6667 entropy=3.1587 ent_coef=0.001846 approx_kl=0.0034 kl_stop=1 intervention_rate=0.0885 front_blocked=0
|
|
[Episode 17120] reward=-63508562.8 actor_loss=0.1249 critic_loss=129266730598.4000 entropy=3.1603 ent_coef=0.001846 approx_kl=0.0021 kl_stop=1 intervention_rate=0.0931 front_blocked=0
|
|
[Eval 17120] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-341488.5 mean_steps=13.4
|
|
[Episode 17130] reward=-56696181.2 actor_loss=0.1520 critic_loss=123086011392.0000 entropy=3.1589 ent_coef=0.001846 approx_kl=0.0037 kl_stop=1 intervention_rate=0.0918 front_blocked=0
|
|
[Episode 17140] reward=-65058530.6 actor_loss=0.1310 critic_loss=134304909994.6667 entropy=3.1600 ent_coef=0.001846 approx_kl=0.0065 kl_stop=1 intervention_rate=0.0911 front_blocked=0
|
|
[Eval 17140] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-546978.8 mean_steps=13.3
|
|
[Episode 17150] reward=-49879820.5 actor_loss=0.1362 critic_loss=122740465664.0000 entropy=3.1608 ent_coef=0.001846 approx_kl=0.0056 kl_stop=1 intervention_rate=0.0840 front_blocked=0
|
|
[Episode 17160] reward=-52869012.5 actor_loss=0.1051 critic_loss=124333942374.4000 entropy=3.1609 ent_coef=0.001846 approx_kl=0.0055 kl_stop=1 intervention_rate=0.0781 front_blocked=0
|
|
[Eval 17160] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-462666.6 mean_steps=12.7
|
|
[Episode 17170] reward=-60622752.9 actor_loss=0.1323 critic_loss=129714483200.0000 entropy=3.1596 ent_coef=0.001845 approx_kl=0.0039 kl_stop=1 intervention_rate=0.0918 front_blocked=0
|
|
[Episode 17180] reward=-54487131.7 actor_loss=0.1015 critic_loss=124188992853.3333 entropy=3.1587 ent_coef=0.001845 approx_kl=0.0025 kl_stop=1 intervention_rate=0.0788 front_blocked=0
|
|
[Eval 17180] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-405542.9 mean_steps=14.8
|
|
[Episode 17190] reward=-48522999.6 actor_loss=0.1160 critic_loss=120489490841.6000 entropy=3.1582 ent_coef=0.001845 approx_kl=0.0053 kl_stop=1 intervention_rate=0.0788 front_blocked=0
|
|
[Episode 17200] reward=-59179254.5 actor_loss=0.1198 critic_loss=127531055786.6667 entropy=3.1579 ent_coef=0.001845 approx_kl=0.0047 kl_stop=1 intervention_rate=0.0918 front_blocked=0
|
|
[Eval 17200] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-379833.4 mean_steps=14.7
|
|
[Episode 17210] reward=-52718938.2 actor_loss=0.1245 critic_loss=120929136054.8571 entropy=3.1590 ent_coef=0.001845 approx_kl=0.0046 kl_stop=1 intervention_rate=0.0853 front_blocked=0
|
|
[Episode 17220] reward=-46124229.6 actor_loss=0.1029 critic_loss=121116198353.4545 entropy=3.1606 ent_coef=0.001845 approx_kl=0.0024 kl_stop=1 intervention_rate=0.0775 front_blocked=0
|
|
[Eval 17220] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-375067.5 mean_steps=14.7
|
|
[Episode 17230] reward=-49310598.7 actor_loss=0.1048 critic_loss=120610846310.4000 entropy=3.1624 ent_coef=0.001845 approx_kl=0.0036 kl_stop=1 intervention_rate=0.0820 front_blocked=0
|
|
[Episode 17240] reward=-58315098.1 actor_loss=0.1089 critic_loss=124351754240.0000 entropy=3.1629 ent_coef=0.001845 approx_kl=0.0040 kl_stop=1 intervention_rate=0.0879 front_blocked=0
|
|
[Eval 17240] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-532880.5 mean_steps=12.4
|
|
[Episode 17250] reward=-50934160.3 actor_loss=0.1145 critic_loss=123318998357.3333 entropy=3.1635 ent_coef=0.001845 approx_kl=0.0012 kl_stop=1 intervention_rate=0.0794 front_blocked=0
|
|
[Episode 17260] reward=-55483321.3 actor_loss=0.1390 critic_loss=124070893795.5556 entropy=3.1634 ent_coef=0.001845 approx_kl=0.0039 kl_stop=1 intervention_rate=0.0879 front_blocked=0
|
|
[Eval 17260] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-363347.3 mean_steps=13.9
|
|
[Episode 17270] reward=-50071441.4 actor_loss=0.1008 critic_loss=124391306035.2000 entropy=3.1623 ent_coef=0.001845 approx_kl=0.0038 kl_stop=1 intervention_rate=0.0840 front_blocked=0
|
|
[Episode 17280] reward=-67388423.2 actor_loss=0.1554 critic_loss=134969878528.0000 entropy=3.1617 ent_coef=0.001844 approx_kl=0.0015 kl_stop=1 intervention_rate=0.0924 front_blocked=0
|
|
[Eval 17280] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-486108.1 mean_steps=13.2
|
|
[Episode 17290] reward=-58398825.7 actor_loss=0.1402 critic_loss=131949325750.8571 entropy=3.1614 ent_coef=0.001844 approx_kl=0.0022 kl_stop=1 intervention_rate=0.0833 front_blocked=0
|
|
[Episode 17300] reward=-62021836.6 actor_loss=0.0796 critic_loss=128712969557.3333 entropy=3.1619 ent_coef=0.001844 approx_kl=0.0060 kl_stop=1 intervention_rate=0.0827 front_blocked=0
|
|
[Eval 17300] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-530650.2 mean_steps=12.4
|
|
[Episode 17310] reward=-56323691.1 actor_loss=0.1169 critic_loss=125192125293.7143 entropy=3.1608 ent_coef=0.001844 approx_kl=0.0057 kl_stop=1 intervention_rate=0.0885 front_blocked=0
|
|
[Episode 17320] reward=-56275182.3 actor_loss=0.1264 critic_loss=124166408874.6667 entropy=3.1609 ent_coef=0.001844 approx_kl=0.0038 kl_stop=1 intervention_rate=0.0879 front_blocked=0
|
|
[Eval 17320] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-450049.1 mean_steps=12.6
|
|
[Episode 17330] reward=-64980746.1 actor_loss=0.1374 critic_loss=127015572041.1429 entropy=3.1655 ent_coef=0.001844 approx_kl=0.0036 kl_stop=1 intervention_rate=0.0964 front_blocked=0
|
|
[Episode 17340] reward=-62540683.4 actor_loss=0.1647 critic_loss=129689332736.0000 entropy=3.1658 ent_coef=0.001844 approx_kl=0.0035 kl_stop=1 intervention_rate=0.0918 front_blocked=0
|
|
[Eval 17340] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-552761.9 mean_steps=12.1
|
|
[Episode 17350] reward=-54184639.5 actor_loss=0.1115 critic_loss=124175602688.0000 entropy=3.1641 ent_coef=0.001844 approx_kl=0.0006 kl_stop=1 intervention_rate=0.0827 front_blocked=0
|
|
[Episode 17360] reward=-59102164.4 actor_loss=0.1228 critic_loss=125542704274.2857 entropy=3.1616 ent_coef=0.001844 approx_kl=0.0031 kl_stop=1 intervention_rate=0.0885 front_blocked=0
|
|
[Eval 17360] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-349742.4 mean_steps=14.7
|
|
[Episode 17370] reward=-57983660.8 actor_loss=0.1270 critic_loss=125692514304.0000 entropy=3.1601 ent_coef=0.001844 approx_kl=0.0034 kl_stop=1 intervention_rate=0.0970 front_blocked=0
|
|
[Episode 17380] reward=-68933166.0 actor_loss=0.1324 critic_loss=132683464704.0000 entropy=3.1592 ent_coef=0.001844 approx_kl=0.0033 kl_stop=1 intervention_rate=0.0951 front_blocked=0
|
|
[Eval 17380] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-592501.8 mean_steps=11.8
|
|
[Episode 17390] reward=-49675612.0 actor_loss=0.1144 critic_loss=118699230354.2857 entropy=3.1578 ent_coef=0.001843 approx_kl=0.0041 kl_stop=1 intervention_rate=0.0820 front_blocked=0
|
|
[Episode 17400] reward=-64646287.7 actor_loss=0.1417 critic_loss=128911644672.0000 entropy=3.1605 ent_coef=0.001843 approx_kl=0.0023 kl_stop=1 intervention_rate=0.0938 front_blocked=0
|
|
[Eval 17400] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-356576.4 mean_steps=15.3
|
|
[Episode 17410] reward=-52470802.0 actor_loss=0.1098 critic_loss=123711120849.4545 entropy=3.1595 ent_coef=0.001843 approx_kl=0.0044 kl_stop=1 intervention_rate=0.0846 front_blocked=0
|
|
[Episode 17420] reward=-45657898.4 actor_loss=0.1223 critic_loss=120781458636.8000 entropy=3.1606 ent_coef=0.001843 approx_kl=0.0051 kl_stop=1 intervention_rate=0.0833 front_blocked=0
|
|
[Eval 17420] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-526571.8 mean_steps=12.6
|
|
[Episode 17430] reward=-51168562.6 actor_loss=0.1237 critic_loss=121322266624.0000 entropy=3.1621 ent_coef=0.001843 approx_kl=0.0038 kl_stop=1 intervention_rate=0.0736 front_blocked=0
|
|
[Episode 17440] reward=-43605985.3 actor_loss=0.1233 critic_loss=116505550848.0000 entropy=3.1583 ent_coef=0.001843 approx_kl=0.0047 kl_stop=1 intervention_rate=0.0859 front_blocked=0
|
|
[Eval 17440] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-627747.4 mean_steps=11.4
|
|
[Episode 17450] reward=-53889539.9 actor_loss=0.1208 critic_loss=127553678904.8889 entropy=3.1544 ent_coef=0.001843 approx_kl=0.0037 kl_stop=1 intervention_rate=0.0840 front_blocked=0
|
|
[Episode 17460] reward=-47748022.6 actor_loss=0.1251 critic_loss=121844430555.4286 entropy=3.1542 ent_coef=0.001843 approx_kl=0.0049 kl_stop=1 intervention_rate=0.0801 front_blocked=0
|
|
[Eval 17460] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-475077.3 mean_steps=12.8
|
|
[Episode 17470] reward=-58715808.5 actor_loss=0.1414 critic_loss=129110673700.5714 entropy=3.1535 ent_coef=0.001843 approx_kl=0.0012 kl_stop=1 intervention_rate=0.0898 front_blocked=0
|
|
[Episode 17480] reward=-57291503.4 actor_loss=0.1253 critic_loss=125183841280.0000 entropy=3.1529 ent_coef=0.001843 approx_kl=0.0033 kl_stop=1 intervention_rate=0.0905 front_blocked=0
|
|
[Eval 17480] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-423718.4 mean_steps=13.6
|
|
[Episode 17490] reward=-41246791.1 actor_loss=0.1008 critic_loss=115149454449.7778 entropy=3.1551 ent_coef=0.001843 approx_kl=0.0040 kl_stop=1 intervention_rate=0.0690 front_blocked=0
|
|
[Episode 17500] reward=-49252794.2 actor_loss=0.1200 critic_loss=120576679191.2727 entropy=3.1555 ent_coef=0.001843 approx_kl=0.0040 kl_stop=1 intervention_rate=0.0775 front_blocked=0
|
|
[Eval 17500] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-258750.5 mean_steps=15.5
|
|
[Episode 17510] reward=-59493110.3 actor_loss=0.1285 critic_loss=126119103926.8571 entropy=3.1554 ent_coef=0.001842 approx_kl=0.0068 kl_stop=1 intervention_rate=0.0977 front_blocked=0
|
|
[Episode 17520] reward=-55176445.1 actor_loss=0.1129 critic_loss=122348562618.1818 entropy=3.1543 ent_coef=0.001842 approx_kl=0.0010 kl_stop=1 intervention_rate=0.0814 front_blocked=0
|
|
[Eval 17520] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-427741.5 mean_steps=13.6
|
|
[Episode 17530] reward=-57539856.4 actor_loss=0.0952 critic_loss=126625092949.3333 entropy=3.1541 ent_coef=0.001842 approx_kl=0.0043 kl_stop=1 intervention_rate=0.0814 front_blocked=0
|
|
[Episode 17540] reward=-48443959.6 actor_loss=0.1124 critic_loss=121102273536.0000 entropy=3.1509 ent_coef=0.001842 approx_kl=0.0035 kl_stop=1 intervention_rate=0.0872 front_blocked=0
|
|
[Eval 17540] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-344545.5 mean_steps=14.4
|
|
[Episode 17550] reward=-47029993.6 actor_loss=0.1362 critic_loss=121204744192.0000 entropy=3.1524 ent_coef=0.001842 approx_kl=0.0018 kl_stop=1 intervention_rate=0.0879 front_blocked=0
|
|
[Episode 17560] reward=-55613711.8 actor_loss=0.1132 critic_loss=127117470720.0000 entropy=3.1498 ent_coef=0.001842 approx_kl=0.0023 kl_stop=1 intervention_rate=0.0768 front_blocked=0
|
|
[Eval 17560] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-591812.0 mean_steps=12.2
|
|
[Episode 17570] reward=-48178550.0 actor_loss=0.1136 critic_loss=121917674837.3333 entropy=3.1497 ent_coef=0.001842 approx_kl=0.0019 kl_stop=1 intervention_rate=0.0775 front_blocked=0
|
|
[Episode 17580] reward=-70723233.3 actor_loss=0.1204 critic_loss=133773446348.8000 entropy=3.1470 ent_coef=0.001842 approx_kl=0.0034 kl_stop=1 intervention_rate=0.1042 front_blocked=0
|
|
[Eval 17580] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-439933.5 mean_steps=12.6
|
|
[Episode 17590] reward=-43794578.1 actor_loss=0.1037 critic_loss=119701314764.8000 entropy=3.1466 ent_coef=0.001842 approx_kl=0.0025 kl_stop=1 intervention_rate=0.0788 front_blocked=0
|
|
[Episode 17600] reward=-54536319.5 actor_loss=0.1144 critic_loss=123404034048.0000 entropy=3.1442 ent_coef=0.001842 approx_kl=0.0027 kl_stop=1 intervention_rate=0.0833 front_blocked=0
|
|
[Eval 17600] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-448034.0 mean_steps=13.8
|
|
[Episode 17610] reward=-46461802.5 actor_loss=0.1223 critic_loss=120638772019.2000 entropy=3.1431 ent_coef=0.001842 approx_kl=0.0029 kl_stop=1 intervention_rate=0.0768 front_blocked=0
|
|
[Episode 17620] reward=-54723170.8 actor_loss=0.0867 critic_loss=125418561536.0000 entropy=3.1391 ent_coef=0.001841 approx_kl=0.0039 kl_stop=1 intervention_rate=0.0755 front_blocked=0
|
|
[Eval 17620] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-556081.1 mean_steps=12.8
|
|
[Episode 17630] reward=-41865450.6 actor_loss=0.0354 critic_loss=113370871125.3333 entropy=3.1329 ent_coef=0.001841 approx_kl=0.0046 kl_stop=1 intervention_rate=0.0723 front_blocked=0
|
|
[Episode 17640] reward=-50148363.5 actor_loss=0.1224 critic_loss=121997566464.0000 entropy=3.1307 ent_coef=0.001841 approx_kl=0.0023 kl_stop=1 intervention_rate=0.0827 front_blocked=0
|
|
[Eval 17640] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-503523.9 mean_steps=13.0
|
|
[Episode 17650] reward=-44118319.3 actor_loss=0.1282 critic_loss=117663023104.0000 entropy=3.1286 ent_coef=0.001841 approx_kl=0.0020 kl_stop=1 intervention_rate=0.0749 front_blocked=0
|
|
[Episode 17660] reward=-59721725.3 actor_loss=0.1092 critic_loss=126379065344.0000 entropy=3.1299 ent_coef=0.001841 approx_kl=0.0046 kl_stop=1 intervention_rate=0.0898 front_blocked=0
|
|
[Eval 17660] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-615446.5 mean_steps=11.6
|
|
[Episode 17670] reward=-48277924.5 actor_loss=0.1064 critic_loss=118656145635.5556 entropy=3.1263 ent_coef=0.001841 approx_kl=0.0045 kl_stop=1 intervention_rate=0.0781 front_blocked=0
|
|
[Episode 17680] reward=-54037355.1 actor_loss=0.0908 critic_loss=125797696398.2222 entropy=3.1257 ent_coef=0.001841 approx_kl=0.0047 kl_stop=1 intervention_rate=0.0768 front_blocked=0
|
|
[Eval 17680] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-456270.6 mean_steps=13.7
|
|
[Episode 17690] reward=-46385394.1 actor_loss=0.0762 critic_loss=120063207610.1818 entropy=3.1260 ent_coef=0.001841 approx_kl=0.0050 kl_stop=1 intervention_rate=0.0716 front_blocked=0
|
|
[Episode 17700] reward=-58173808.0 actor_loss=0.1070 critic_loss=127212057395.2000 entropy=3.1258 ent_coef=0.001841 approx_kl=0.0026 kl_stop=1 intervention_rate=0.0846 front_blocked=0
|
|
[Eval 17700] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-482679.8 mean_steps=14.6
|
|
[Episode 17710] reward=-58998248.5 actor_loss=0.1193 critic_loss=129699782656.0000 entropy=3.1243 ent_coef=0.001841 approx_kl=0.0050 kl_stop=1 intervention_rate=0.0840 front_blocked=0
|
|
[Episode 17720] reward=-54713604.3 actor_loss=0.1245 critic_loss=124649381888.0000 entropy=3.1209 ent_coef=0.001841 approx_kl=0.0036 kl_stop=1 intervention_rate=0.0833 front_blocked=0
|
|
[Eval 17720] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-473504.2 mean_steps=12.8
|
|
[Episode 17730] reward=-58340479.9 actor_loss=0.1206 critic_loss=129297190912.0000 entropy=3.1199 ent_coef=0.001840 approx_kl=0.0031 kl_stop=1 intervention_rate=0.0859 front_blocked=0
|
|
[Episode 17740] reward=-52161222.3 actor_loss=0.1013 critic_loss=123815475200.0000 entropy=3.1214 ent_coef=0.001840 approx_kl=0.0029 kl_stop=1 intervention_rate=0.0827 front_blocked=0
|
|
[Eval 17740] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-511278.4 mean_steps=13.2
|
|
[Episode 17750] reward=-44255760.4 actor_loss=0.1158 critic_loss=119279467178.6667 entropy=3.1221 ent_coef=0.001840 approx_kl=0.0044 kl_stop=1 intervention_rate=0.0684 front_blocked=0
|
|
[Episode 17760] reward=-61644164.8 actor_loss=0.1036 critic_loss=131119561581.7143 entropy=3.1208 ent_coef=0.001840 approx_kl=0.0027 kl_stop=1 intervention_rate=0.0898 front_blocked=0
|
|
[Eval 17760] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-506130.7 mean_steps=12.9
|
|
[Episode 17770] reward=-63991726.4 actor_loss=0.1595 critic_loss=126992116394.6667 entropy=3.1214 ent_coef=0.001840 approx_kl=0.0044 kl_stop=1 intervention_rate=0.0964 front_blocked=0
|
|
[Episode 17780] reward=-61475611.0 actor_loss=0.1182 critic_loss=128522205184.0000 entropy=3.1180 ent_coef=0.001840 approx_kl=0.0043 kl_stop=1 intervention_rate=0.0885 front_blocked=0
|
|
[Eval 17780] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-519427.7 mean_steps=12.3
|
|
[Episode 17790] reward=-67444743.7 actor_loss=0.1078 critic_loss=135039795200.0000 entropy=3.1164 ent_coef=0.001840 approx_kl=0.0035 kl_stop=1 intervention_rate=0.0924 front_blocked=0
|
|
[Episode 17800] reward=-55883655.1 actor_loss=0.1195 critic_loss=122269210851.5556 entropy=3.1161 ent_coef=0.001840 approx_kl=0.0038 kl_stop=1 intervention_rate=0.0885 front_blocked=0
|
|
[Eval 17800] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-441045.2 mean_steps=13.2
|
|
[Episode 17810] reward=-60110766.1 actor_loss=0.1051 critic_loss=130658934784.0000 entropy=3.1177 ent_coef=0.001840 approx_kl=0.0060 kl_stop=1 intervention_rate=0.0853 front_blocked=0
|
|
[Episode 17820] reward=-50234254.2 actor_loss=0.2077 critic_loss=122850607104.0000 entropy=3.1188 ent_coef=0.001840 approx_kl=0.0052 kl_stop=1 intervention_rate=0.0807 front_blocked=0
|
|
[Eval 17820] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-448271.9 mean_steps=12.9
|
|
[Episode 17830] reward=-60164991.6 actor_loss=0.1139 critic_loss=126407130225.7778 entropy=3.1207 ent_coef=0.001840 approx_kl=0.0037 kl_stop=1 intervention_rate=0.0951 front_blocked=0
|
|
[Episode 17840] reward=-45752416.4 actor_loss=0.0760 critic_loss=119394499697.7778 entropy=3.1236 ent_coef=0.001839 approx_kl=0.0016 kl_stop=1 intervention_rate=0.0684 front_blocked=0
|
|
[Eval 17840] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-376235.9 mean_steps=14.6
|
|
[Episode 17850] reward=-69941768.7 actor_loss=0.1286 critic_loss=134905907882.6667 entropy=3.1252 ent_coef=0.001839 approx_kl=0.0013 kl_stop=1 intervention_rate=0.0892 front_blocked=0
|
|
[Episode 17860] reward=-47991663.9 actor_loss=0.0825 critic_loss=116626606080.0000 entropy=3.1222 ent_coef=0.001839 approx_kl=0.0048 kl_stop=1 intervention_rate=0.0723 front_blocked=0
|
|
[Eval 17860] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-396086.9 mean_steps=14.0
|
|
[Episode 17870] reward=-53213770.4 actor_loss=0.1350 critic_loss=124467825868.8000 entropy=3.1194 ent_coef=0.001839 approx_kl=0.0032 kl_stop=1 intervention_rate=0.0827 front_blocked=0
|
|
[Episode 17880] reward=-67495382.2 actor_loss=0.1300 critic_loss=133096988672.0000 entropy=3.1196 ent_coef=0.001839 approx_kl=0.0042 kl_stop=1 intervention_rate=0.0990 front_blocked=0
|
|
[Eval 17880] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-390626.0 mean_steps=14.0
|
|
[Episode 17890] reward=-47698343.6 actor_loss=0.1080 critic_loss=122224994304.0000 entropy=3.1192 ent_coef=0.001839 approx_kl=0.0052 kl_stop=1 intervention_rate=0.0755 front_blocked=0
|
|
[Episode 17900] reward=-56275408.8 actor_loss=0.1094 critic_loss=129293971456.0000 entropy=3.1184 ent_coef=0.001839 approx_kl=0.0070 kl_stop=1 intervention_rate=0.0827 front_blocked=0
|
|
[Eval 17900] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-566332.9 mean_steps=11.9
|
|
[Episode 17910] reward=-52685661.8 actor_loss=0.1006 critic_loss=122944278528.0000 entropy=3.1157 ent_coef=0.001839 approx_kl=0.0032 kl_stop=1 intervention_rate=0.0755 front_blocked=0
|
|
[Episode 17920] reward=-60442816.5 actor_loss=0.1300 critic_loss=127896435712.0000 entropy=3.1146 ent_coef=0.001839 approx_kl=0.0029 kl_stop=1 intervention_rate=0.0905 front_blocked=0
|
|
[Eval 17920] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-444640.6 mean_steps=12.7
|
|
[Episode 17930] reward=-47062710.9 actor_loss=0.1084 critic_loss=119795963611.4286 entropy=3.1160 ent_coef=0.001839 approx_kl=0.0040 kl_stop=1 intervention_rate=0.0710 front_blocked=0
|
|
[Episode 17940] reward=-62286516.7 actor_loss=0.1354 critic_loss=124843080908.8000 entropy=3.1171 ent_coef=0.001839 approx_kl=0.0028 kl_stop=1 intervention_rate=0.0944 front_blocked=0
|
|
[Eval 17940] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-515173.5 mean_steps=12.4
|
|
[Episode 17950] reward=-60900757.1 actor_loss=0.1199 critic_loss=131268951040.0000 entropy=3.1166 ent_coef=0.001838 approx_kl=0.0026 kl_stop=1 intervention_rate=0.0853 front_blocked=0
|
|
[Episode 17960] reward=-43271145.6 actor_loss=0.1121 critic_loss=115997303466.6667 entropy=3.1164 ent_coef=0.001838 approx_kl=0.0026 kl_stop=1 intervention_rate=0.0827 front_blocked=0
|
|
[Eval 17960] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-408189.8 mean_steps=14.1
|
|
[Episode 17970] reward=-57640025.4 actor_loss=0.1029 critic_loss=129003588812.8000 entropy=3.1143 ent_coef=0.001838 approx_kl=0.0060 kl_stop=1 intervention_rate=0.0853 front_blocked=0
|
|
[Episode 17980] reward=-61996320.9 actor_loss=0.1376 critic_loss=130647372595.2000 entropy=3.1170 ent_coef=0.001838 approx_kl=0.0026 kl_stop=1 intervention_rate=0.0905 front_blocked=0
|
|
[Eval 17980] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-357182.7 mean_steps=14.7
|
|
[Episode 17990] reward=-44043461.0 actor_loss=0.1390 critic_loss=118690759400.7273 entropy=3.1176 ent_coef=0.001838 approx_kl=-0.0001 kl_stop=1 intervention_rate=0.0807 front_blocked=0
|
|
[Episode 18000] reward=-66698764.4 actor_loss=0.1383 critic_loss=133724584345.6000 entropy=3.1173 ent_coef=0.001838 approx_kl=0.0026 kl_stop=1 intervention_rate=0.0944 front_blocked=0
|
|
[Eval 18000] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-515541.1 mean_steps=12.4
|
|
[Episode 18010] reward=-48926644.6 actor_loss=0.1415 critic_loss=120730967244.8000 entropy=3.1139 ent_coef=0.001838 approx_kl=0.0029 kl_stop=1 intervention_rate=0.0840 front_blocked=0
|
|
[Episode 18020] reward=-54141630.8 actor_loss=0.1055 critic_loss=125162389504.0000 entropy=3.1151 ent_coef=0.001838 approx_kl=0.0013 kl_stop=1 intervention_rate=0.0872 front_blocked=0
|
|
[Eval 18020] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-467120.3 mean_steps=12.7
|
|
[Episode 18030] reward=-48117603.6 actor_loss=0.1066 critic_loss=119319059309.7143 entropy=3.1162 ent_coef=0.001838 approx_kl=0.0031 kl_stop=1 intervention_rate=0.0833 front_blocked=0
|
|
[Episode 18040] reward=-53638076.5 actor_loss=0.1095 critic_loss=129029692529.7778 entropy=3.1152 ent_coef=0.001838 approx_kl=0.0031 kl_stop=1 intervention_rate=0.0820 front_blocked=0
|
|
[Eval 18040] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-389539.9 mean_steps=14.0
|
|
[Episode 18050] reward=-47685928.5 actor_loss=0.1001 critic_loss=120265695232.0000 entropy=3.1142 ent_coef=0.001838 approx_kl=0.0039 kl_stop=1 intervention_rate=0.0755 front_blocked=0
|
|
[Episode 18060] reward=-48310884.2 actor_loss=0.0932 critic_loss=121881567232.0000 entropy=3.1137 ent_coef=0.001837 approx_kl=0.0033 kl_stop=1 intervention_rate=0.0820 front_blocked=0
|
|
[Eval 18060] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-441159.2 mean_steps=13.2
|
|
[Episode 18070] reward=-51200100.9 actor_loss=0.1374 critic_loss=118981150906.1818 entropy=3.1157 ent_coef=0.001837 approx_kl=0.0022 kl_stop=1 intervention_rate=0.0853 front_blocked=0
|
|
[Episode 18080] reward=-39552420.9 actor_loss=0.1191 critic_loss=116859785216.0000 entropy=3.1154 ent_coef=0.001837 approx_kl=0.0048 kl_stop=1 intervention_rate=0.0703 front_blocked=0
|
|
[Eval 18080] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-489905.7 mean_steps=13.1
|
|
[Episode 18090] reward=-54797502.7 actor_loss=0.0972 critic_loss=125227550492.4444 entropy=3.1162 ent_coef=0.001837 approx_kl=0.0041 kl_stop=1 intervention_rate=0.0775 front_blocked=0
|
|
[Episode 18100] reward=-57527432.7 actor_loss=0.1443 critic_loss=127160614912.0000 entropy=3.1153 ent_coef=0.001837 approx_kl=0.0018 kl_stop=1 intervention_rate=0.0762 front_blocked=0
|
|
[Eval 18100] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-402615.0 mean_steps=13.8
|
|
[Episode 18110] reward=-51826379.5 actor_loss=0.1241 critic_loss=118811811020.8000 entropy=3.1135 ent_coef=0.001837 approx_kl=0.0048 kl_stop=1 intervention_rate=0.0866 front_blocked=0
|
|
[Episode 18120] reward=-55505424.5 actor_loss=0.1145 critic_loss=125141483520.0000 entropy=3.1118 ent_coef=0.001837 approx_kl=0.0030 kl_stop=1 intervention_rate=0.0866 front_blocked=0
|
|
[Eval 18120] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-430538.0 mean_steps=13.5
|
|
[Episode 18130] reward=-43648776.4 actor_loss=0.1059 critic_loss=117613902233.6000 entropy=3.1095 ent_coef=0.001837 approx_kl=0.0042 kl_stop=1 intervention_rate=0.0794 front_blocked=0
|
|
[Episode 18140] reward=-53144093.0 actor_loss=0.1148 critic_loss=126483633493.3333 entropy=3.1089 ent_coef=0.001837 approx_kl=0.0014 kl_stop=1 intervention_rate=0.0807 front_blocked=0
|
|
[Eval 18140] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-525696.9 mean_steps=13.1
|
|
[Episode 18150] reward=-43370767.7 actor_loss=0.1317 critic_loss=115457242726.4000 entropy=3.1087 ent_coef=0.001837 approx_kl=0.0018 kl_stop=1 intervention_rate=0.0768 front_blocked=0
|
|
[Episode 18160] reward=-56801207.7 actor_loss=0.1346 critic_loss=127419852117.3333 entropy=3.1074 ent_coef=0.001837 approx_kl=0.0033 kl_stop=1 intervention_rate=0.0879 front_blocked=0
|
|
[Eval 18160] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-396269.8 mean_steps=13.8
|
|
[Episode 18170] reward=-58307397.8 actor_loss=0.1258 critic_loss=124757965824.0000 entropy=3.1059 ent_coef=0.001836 approx_kl=0.0050 kl_stop=1 intervention_rate=0.0853 front_blocked=0
|
|
[Episode 18180] reward=-55864168.9 actor_loss=0.1290 critic_loss=127309118122.6667 entropy=3.1070 ent_coef=0.001836 approx_kl=0.0049 kl_stop=1 intervention_rate=0.0872 front_blocked=0
|
|
[Eval 18180] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-444395.7 mean_steps=13.5
|
|
[Episode 18190] reward=-53528962.5 actor_loss=0.0740 critic_loss=124317769728.0000 entropy=3.1060 ent_coef=0.001836 approx_kl=0.0037 kl_stop=1 intervention_rate=0.0755 front_blocked=0
|
|
[Episode 18200] reward=-57666225.9 actor_loss=0.1160 critic_loss=124542951970.1333 entropy=3.1047 ent_coef=0.001836 approx_kl=0.0010 kl_stop=1 intervention_rate=0.0853 front_blocked=0
|
|
[Eval 18200] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-431197.7 mean_steps=12.4
|
|
[Episode 18210] reward=-51235570.8 actor_loss=0.1002 critic_loss=122769403904.0000 entropy=3.1024 ent_coef=0.001836 approx_kl=0.0026 kl_stop=1 intervention_rate=0.0879 front_blocked=0
|
|
[Episode 18220] reward=-60687653.1 actor_loss=0.1824 critic_loss=133166681088.0000 entropy=3.0977 ent_coef=0.001836 approx_kl=0.0030 kl_stop=1 intervention_rate=0.0918 front_blocked=0
|
|
[Eval 18220] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-460458.9 mean_steps=12.8
|
|
[Episode 18230] reward=-55017981.8 actor_loss=0.1132 critic_loss=121324314624.0000 entropy=3.0951 ent_coef=0.001836 approx_kl=0.0056 kl_stop=1 intervention_rate=0.0833 front_blocked=0
|
|
[Episode 18240] reward=-55970299.7 actor_loss=0.1136 critic_loss=126048382156.8000 entropy=3.0937 ent_coef=0.001836 approx_kl=0.0032 kl_stop=1 intervention_rate=0.0801 front_blocked=0
|
|
[Eval 18240] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-460523.5 mean_steps=13.5
|
|
[Episode 18250] reward=-61027292.8 actor_loss=0.1250 critic_loss=129649901568.0000 entropy=3.0903 ent_coef=0.001836 approx_kl=0.0014 kl_stop=1 intervention_rate=0.0924 front_blocked=0
|
|
[Episode 18260] reward=-67274752.5 actor_loss=0.1241 critic_loss=133352239104.0000 entropy=3.0893 ent_coef=0.001836 approx_kl=0.0045 kl_stop=1 intervention_rate=0.0944 front_blocked=0
|
|
[Eval 18260] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-442551.6 mean_steps=14.2
|
|
[Episode 18270] reward=-47245884.5 actor_loss=0.0848 critic_loss=120630213290.6667 entropy=3.0906 ent_coef=0.001836 approx_kl=0.0025 kl_stop=1 intervention_rate=0.0723 front_blocked=0
|
|
[Episode 18280] reward=-62876367.2 actor_loss=0.1210 critic_loss=129072614400.0000 entropy=3.0910 ent_coef=0.001835 approx_kl=0.0019 kl_stop=1 intervention_rate=0.0924 front_blocked=0
|
|
[Eval 18280] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-480824.8 mean_steps=13.6
|
|
[Episode 18290] reward=-48024748.0 actor_loss=0.0921 critic_loss=125704346282.6667 entropy=3.0918 ent_coef=0.001835 approx_kl=0.0023 kl_stop=1 intervention_rate=0.0742 front_blocked=0
|
|
[Episode 18300] reward=-52811832.7 actor_loss=0.1447 critic_loss=124979496732.4444 entropy=3.0911 ent_coef=0.001835 approx_kl=0.0017 kl_stop=1 intervention_rate=0.0840 front_blocked=0
|
|
[Eval 18300] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-391310.8 mean_steps=13.7
|
|
[Episode 18310] reward=-61044673.0 actor_loss=0.1102 critic_loss=128534972825.6000 entropy=3.0913 ent_coef=0.001835 approx_kl=0.0048 kl_stop=1 intervention_rate=0.0918 front_blocked=0
|
|
[Episode 18320] reward=-53475869.6 actor_loss=0.1173 critic_loss=123906499730.2857 entropy=3.0939 ent_coef=0.001835 approx_kl=0.0036 kl_stop=1 intervention_rate=0.0840 front_blocked=0
|
|
[Eval 18320] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-561773.8 mean_steps=12.7
|
|
[Episode 18330] reward=-53481853.9 actor_loss=0.1409 critic_loss=123298013184.0000 entropy=3.0946 ent_coef=0.001835 approx_kl=0.0038 kl_stop=1 intervention_rate=0.0827 front_blocked=0
|
|
[Episode 18340] reward=-57204103.0 actor_loss=0.0967 critic_loss=124277260288.0000 entropy=3.0959 ent_coef=0.001835 approx_kl=0.0054 kl_stop=1 intervention_rate=0.0762 front_blocked=0
|
|
[Eval 18340] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-414980.8 mean_steps=13.8
|
|
[Episode 18350] reward=-47958306.9 actor_loss=0.1158 critic_loss=120761730048.0000 entropy=3.0921 ent_coef=0.001835 approx_kl=0.0032 kl_stop=1 intervention_rate=0.0846 front_blocked=0
|
|
[Episode 18360] reward=-41221506.7 actor_loss=0.0832 critic_loss=114729390080.0000 entropy=3.0923 ent_coef=0.001835 approx_kl=0.0040 kl_stop=1 intervention_rate=0.0716 front_blocked=0
|
|
[Eval 18360] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-453503.4 mean_steps=13.5
|
|
[Episode 18370] reward=-46857230.0 actor_loss=0.1252 critic_loss=118586132666.1818 entropy=3.0896 ent_coef=0.001835 approx_kl=0.0034 kl_stop=1 intervention_rate=0.0729 front_blocked=0
|
|
[Episode 18380] reward=-56998856.2 actor_loss=0.1348 critic_loss=124476927414.8571 entropy=3.0873 ent_coef=0.001835 approx_kl=0.0044 kl_stop=1 intervention_rate=0.0833 front_blocked=0
|
|
[Eval 18380] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-413951.6 mean_steps=13.3
|
|
[Episode 18390] reward=-60055542.7 actor_loss=0.1379 critic_loss=131051610112.0000 entropy=3.0846 ent_coef=0.001834 approx_kl=0.0052 kl_stop=1 intervention_rate=0.0840 front_blocked=0
|
|
[Episode 18400] reward=-54772010.2 actor_loss=0.1356 critic_loss=122177375670.8571 entropy=3.0821 ent_coef=0.001834 approx_kl=0.0010 kl_stop=1 intervention_rate=0.0840 front_blocked=0
|
|
[Eval 18400] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-518233.2 mean_steps=12.2
|
|
[Episode 18410] reward=-55529758.4 actor_loss=0.1281 critic_loss=123236932887.2727 entropy=3.0825 ent_coef=0.001834 approx_kl=0.0028 kl_stop=1 intervention_rate=0.0892 front_blocked=0
|
|
[Episode 18420] reward=-53735915.9 actor_loss=0.1138 critic_loss=120487905280.0000 entropy=3.0809 ent_coef=0.001834 approx_kl=0.0058 kl_stop=1 intervention_rate=0.0833 front_blocked=0
|
|
[Eval 18420] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-554873.1 mean_steps=12.8
|
|
[Episode 18430] reward=-59687234.9 actor_loss=0.1387 critic_loss=129675324322.9091 entropy=3.0779 ent_coef=0.001834 approx_kl=0.0012 kl_stop=1 intervention_rate=0.0938 front_blocked=0
|
|
[Episode 18440] reward=-55816889.4 actor_loss=0.1463 critic_loss=121392674816.0000 entropy=3.0759 ent_coef=0.001834 approx_kl=0.0028 kl_stop=1 intervention_rate=0.0957 front_blocked=0
|
|
[Eval 18440] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-610276.4 mean_steps=12.0
|
|
[Episode 18450] reward=-42387616.0 actor_loss=0.1316 critic_loss=117416280064.0000 entropy=3.0739 ent_coef=0.001834 approx_kl=0.0036 kl_stop=1 intervention_rate=0.0736 front_blocked=0
|
|
[Episode 18460] reward=-49520937.6 actor_loss=0.1087 critic_loss=121578129700.5714 entropy=3.0747 ent_coef=0.001834 approx_kl=0.0029 kl_stop=1 intervention_rate=0.0775 front_blocked=0
|
|
[Eval 18460] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-381019.6 mean_steps=13.9
|
|
[Episode 18470] reward=-52312968.7 actor_loss=0.1434 critic_loss=122897901860.5714 entropy=3.0745 ent_coef=0.001834 approx_kl=0.0042 kl_stop=1 intervention_rate=0.0794 front_blocked=0
|
|
[Episode 18480] reward=-54540324.4 actor_loss=0.1398 critic_loss=124889073956.5714 entropy=3.0741 ent_coef=0.001834 approx_kl=0.0017 kl_stop=1 intervention_rate=0.0885 front_blocked=0
|
|
[Eval 18480] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-554279.0 mean_steps=10.9
|
|
[Episode 18490] reward=-47231064.4 actor_loss=0.1142 critic_loss=122407727104.0000 entropy=3.0746 ent_coef=0.001834 approx_kl=0.0046 kl_stop=1 intervention_rate=0.0827 front_blocked=0
|
|
[Episode 18500] reward=-45161503.6 actor_loss=0.0988 critic_loss=118249345024.0000 entropy=3.0732 ent_coef=0.001834 approx_kl=0.0027 kl_stop=1 intervention_rate=0.0781 front_blocked=0
|
|
[Eval 18500] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-511603.7 mean_steps=13.1
|
|
[Episode 18510] reward=-56037506.9 actor_loss=0.1459 critic_loss=125324653363.2000 entropy=3.0720 ent_coef=0.001833 approx_kl=0.0027 kl_stop=1 intervention_rate=0.0938 front_blocked=0
|
|
[Episode 18520] reward=-50084254.0 actor_loss=0.1202 critic_loss=119299915776.0000 entropy=3.0732 ent_coef=0.001833 approx_kl=0.0014 kl_stop=1 intervention_rate=0.0794 front_blocked=0
|
|
[Eval 18520] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-348688.3 mean_steps=15.3
|
|
[Episode 18530] reward=-49515913.5 actor_loss=0.1208 critic_loss=120516100778.6667 entropy=3.0723 ent_coef=0.001833 approx_kl=0.0012 kl_stop=1 intervention_rate=0.0840 front_blocked=0
|
|
[Episode 18540] reward=-52432041.5 actor_loss=0.1389 critic_loss=125967502540.8000 entropy=3.0720 ent_coef=0.001833 approx_kl=0.0003 kl_stop=1 intervention_rate=0.0794 front_blocked=0
|
|
[Eval 18540] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-468782.1 mean_steps=13.0
|
|
[Episode 18550] reward=-55541328.6 actor_loss=0.1296 critic_loss=125044273971.2000 entropy=3.0714 ent_coef=0.001833 approx_kl=0.0038 kl_stop=1 intervention_rate=0.0905 front_blocked=0
|
|
[Episode 18560] reward=-50454889.4 actor_loss=0.1300 critic_loss=119804773522.2857 entropy=3.0698 ent_coef=0.001833 approx_kl=0.0046 kl_stop=1 intervention_rate=0.0801 front_blocked=0
|
|
[Eval 18560] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-627720.6 mean_steps=11.4
|
|
[Episode 18570] reward=-56812289.4 actor_loss=0.1137 critic_loss=126719295488.0000 entropy=3.0674 ent_coef=0.001833 approx_kl=0.0046 kl_stop=1 intervention_rate=0.0833 front_blocked=0
|
|
[Episode 18580] reward=-54902464.5 actor_loss=0.1307 critic_loss=126059939612.4444 entropy=3.0678 ent_coef=0.001833 approx_kl=0.0047 kl_stop=1 intervention_rate=0.0755 front_blocked=0
|
|
[Eval 18580] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-382207.7 mean_steps=13.9
|
|
[Episode 18590] reward=-55062103.5 actor_loss=0.1307 critic_loss=126172179456.0000 entropy=3.0682 ent_coef=0.001833 approx_kl=0.0026 kl_stop=1 intervention_rate=0.0788 front_blocked=0
|
|
[Episode 18600] reward=-47649688.7 actor_loss=0.1119 critic_loss=118615731851.6364 entropy=3.0682 ent_coef=0.001833 approx_kl=0.0039 kl_stop=1 intervention_rate=0.0801 front_blocked=0
|
|
[Eval 18600] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-450707.0 mean_steps=12.8
|
|
[Episode 18610] reward=-50363372.3 actor_loss=0.1114 critic_loss=123582357504.0000 entropy=3.0689 ent_coef=0.001833 approx_kl=0.0048 kl_stop=1 intervention_rate=0.0762 front_blocked=0
|
|
[Episode 18620] reward=-62392470.8 actor_loss=0.1393 critic_loss=126464428714.6667 entropy=3.0653 ent_coef=0.001832 approx_kl=0.0054 kl_stop=1 intervention_rate=0.0905 front_blocked=0
|
|
[Eval 18620] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-474351.4 mean_steps=13.6
|
|
[Episode 18630] reward=-55148338.6 actor_loss=0.1358 critic_loss=124078292992.0000 entropy=3.0633 ent_coef=0.001832 approx_kl=0.0057 kl_stop=1 intervention_rate=0.0911 front_blocked=0
|
|
[Episode 18640] reward=-54566490.2 actor_loss=0.1293 critic_loss=123980635340.8000 entropy=3.0600 ent_coef=0.001832 approx_kl=0.0021 kl_stop=1 intervention_rate=0.0931 front_blocked=0
|
|
[Eval 18640] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-433434.7 mean_steps=13.1
|
|
[Episode 18650] reward=-43155217.0 actor_loss=0.1076 critic_loss=120260167680.0000 entropy=3.0584 ent_coef=0.001832 approx_kl=0.0035 kl_stop=1 intervention_rate=0.0710 front_blocked=0
|
|
[Episode 18660] reward=-62214750.4 actor_loss=0.1018 critic_loss=132194540544.0000 entropy=3.0560 ent_coef=0.001832 approx_kl=0.0036 kl_stop=1 intervention_rate=0.0879 front_blocked=0
|
|
[Eval 18660] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-382792.0 mean_steps=13.2
|
|
[Episode 18670] reward=-56602361.3 actor_loss=0.1064 critic_loss=123871963818.6667 entropy=3.0542 ent_coef=0.001832 approx_kl=0.0026 kl_stop=1 intervention_rate=0.0859 front_blocked=0
|
|
[Episode 18680] reward=-53357373.3 actor_loss=0.1153 critic_loss=122556210176.0000 entropy=3.0532 ent_coef=0.001832 approx_kl=0.0033 kl_stop=1 intervention_rate=0.0749 front_blocked=0
|
|
[Eval 18680] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-544268.9 mean_steps=12.3
|
|
[Episode 18690] reward=-63684261.6 actor_loss=0.1796 critic_loss=134415398229.3333 entropy=3.0521 ent_coef=0.001832 approx_kl=0.0057 kl_stop=1 intervention_rate=0.0983 front_blocked=0
|
|
[Episode 18700] reward=-63157934.3 actor_loss=0.1309 critic_loss=130859415779.5556 entropy=3.0516 ent_coef=0.001832 approx_kl=0.0013 kl_stop=1 intervention_rate=0.0905 front_blocked=0
|
|
[Eval 18700] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-594864.3 mean_steps=11.2
|
|
[Episode 18710] reward=-63781660.8 actor_loss=0.1198 critic_loss=133435249012.3636 entropy=3.0521 ent_coef=0.001832 approx_kl=0.0044 kl_stop=1 intervention_rate=0.0879 front_blocked=0
|
|
[Episode 18720] reward=-70361449.7 actor_loss=0.1449 critic_loss=131816121685.3333 entropy=3.0515 ent_coef=0.001832 approx_kl=0.0013 kl_stop=1 intervention_rate=0.0983 front_blocked=0
|
|
[Eval 18720] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-428041.9 mean_steps=12.3
|
|
[Episode 18730] reward=-56839378.0 actor_loss=0.1194 critic_loss=126040879476.3636 entropy=3.0543 ent_coef=0.001831 approx_kl=0.0020 kl_stop=1 intervention_rate=0.0885 front_blocked=0
|
|
[Episode 18740] reward=-47588701.7 actor_loss=0.0971 critic_loss=117534048256.0000 entropy=3.0534 ent_coef=0.001831 approx_kl=0.0025 kl_stop=1 intervention_rate=0.0801 front_blocked=0
|
|
[Eval 18740] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-597957.8 mean_steps=11.6
|
|
[Episode 18750] reward=-43932496.8 actor_loss=0.1177 critic_loss=118015705998.2222 entropy=3.0556 ent_coef=0.001831 approx_kl=0.0037 kl_stop=1 intervention_rate=0.0762 front_blocked=0
|
|
[Episode 18760] reward=-56034158.8 actor_loss=0.1135 critic_loss=130155747737.6000 entropy=3.0552 ent_coef=0.001831 approx_kl=0.0083 kl_stop=1 intervention_rate=0.0820 front_blocked=0
|
|
[Eval 18760] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-530565.0 mean_steps=12.4
|
|
[Episode 18770] reward=-53467077.9 actor_loss=0.0954 critic_loss=117775646720.0000 entropy=3.0556 ent_coef=0.001831 approx_kl=0.0042 kl_stop=1 intervention_rate=0.0866 front_blocked=0
|
|
[Episode 18780] reward=-60421081.0 actor_loss=0.0984 critic_loss=130422516394.6667 entropy=3.0556 ent_coef=0.001831 approx_kl=-0.0002 kl_stop=1 intervention_rate=0.0781 front_blocked=0
|
|
[Eval 18780] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-607777.7 mean_steps=12.0
|
|
[Episode 18790] reward=-52583638.5 actor_loss=0.1245 critic_loss=124911973717.3333 entropy=3.0543 ent_coef=0.001831 approx_kl=0.0021 kl_stop=1 intervention_rate=0.0814 front_blocked=0
|
|
[Episode 18800] reward=-59981909.3 actor_loss=0.1122 critic_loss=129812325990.4000 entropy=3.0531 ent_coef=0.001831 approx_kl=0.0051 kl_stop=1 intervention_rate=0.0801 front_blocked=0
|
|
[Eval 18800] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-475095.0 mean_steps=13.4
|
|
[Episode 18810] reward=-46604544.9 actor_loss=0.1256 critic_loss=119590240256.0000 entropy=3.0508 ent_coef=0.001831 approx_kl=0.0031 kl_stop=1 intervention_rate=0.0859 front_blocked=0
|
|
[Episode 18820] reward=-53661810.2 actor_loss=0.1326 critic_loss=125592010752.0000 entropy=3.0534 ent_coef=0.001831 approx_kl=0.0062 kl_stop=1 intervention_rate=0.0794 front_blocked=0
|
|
[Eval 18820] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-586217.1 mean_steps=11.7
|
|
[Episode 18830] reward=-46253819.3 actor_loss=0.1154 critic_loss=120118669312.0000 entropy=3.0547 ent_coef=0.001831 approx_kl=0.0027 kl_stop=1 intervention_rate=0.0840 front_blocked=0
|
|
[Episode 18840] reward=-58388937.6 actor_loss=0.1388 critic_loss=126060951552.0000 entropy=3.0535 ent_coef=0.001830 approx_kl=0.0008 kl_stop=1 intervention_rate=0.0885 front_blocked=0
|
|
[Eval 18840] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-606317.5 mean_steps=11.7
|
|
[Episode 18850] reward=-60460847.7 actor_loss=0.1210 critic_loss=127728355328.0000 entropy=3.0512 ent_coef=0.001830 approx_kl=0.0037 kl_stop=1 intervention_rate=0.0892 front_blocked=0
|
|
[Episode 18860] reward=-45365146.0 actor_loss=0.1149 critic_loss=117249430869.3333 entropy=3.0521 ent_coef=0.001830 approx_kl=0.0058 kl_stop=1 intervention_rate=0.0755 front_blocked=0
|
|
[Eval 18860] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-492292.4 mean_steps=12.7
|
|
[Episode 18870] reward=-53037875.4 actor_loss=0.1238 critic_loss=125711983957.3333 entropy=3.0521 ent_coef=0.001830 approx_kl=0.0033 kl_stop=1 intervention_rate=0.0872 front_blocked=0
|
|
[Episode 18880] reward=-55710791.7 actor_loss=0.1339 critic_loss=124751508275.2000 entropy=3.0518 ent_coef=0.001830 approx_kl=0.0038 kl_stop=1 intervention_rate=0.0924 front_blocked=0
|
|
[Eval 18880] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-601796.4 mean_steps=12.1
|
|
[Episode 18890] reward=-58458871.9 actor_loss=0.1400 critic_loss=125716203520.0000 entropy=3.0500 ent_coef=0.001830 approx_kl=0.0000 kl_stop=1 intervention_rate=0.0866 front_blocked=0
|
|
[Episode 18900] reward=-60401319.2 actor_loss=0.0921 critic_loss=128047620096.0000 entropy=3.0503 ent_coef=0.001830 approx_kl=0.0047 kl_stop=1 intervention_rate=0.0833 front_blocked=0
|
|
[Eval 18900] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-414222.3 mean_steps=14.1
|
|
[Episode 18910] reward=-53072883.2 actor_loss=0.1261 critic_loss=118495897190.4000 entropy=3.0499 ent_coef=0.001830 approx_kl=0.0005 kl_stop=1 intervention_rate=0.0768 front_blocked=0
|
|
[Episode 18920] reward=-59495488.7 actor_loss=0.1263 critic_loss=126304896146.2857 entropy=3.0476 ent_coef=0.001830 approx_kl=0.0035 kl_stop=1 intervention_rate=0.0872 front_blocked=0
|
|
[Eval 18920] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-536462.0 mean_steps=12.4
|
|
[Episode 18930] reward=-47909571.0 actor_loss=0.1138 critic_loss=120426490538.6667 entropy=3.0448 ent_coef=0.001830 approx_kl=0.0001 kl_stop=1 intervention_rate=0.0781 front_blocked=0
|
|
[Episode 18940] reward=-59129252.4 actor_loss=0.1385 critic_loss=126861109930.6667 entropy=3.0421 ent_coef=0.001830 approx_kl=0.0039 kl_stop=1 intervention_rate=0.0977 front_blocked=0
|
|
[Eval 18940] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-451074.5 mean_steps=13.4
|
|
[Episode 18950] reward=-64935583.4 actor_loss=0.1454 critic_loss=131549795123.2000 entropy=3.0426 ent_coef=0.001829 approx_kl=0.0035 kl_stop=1 intervention_rate=0.0911 front_blocked=0
|
|
[Episode 18960] reward=-49674445.1 actor_loss=0.1008 critic_loss=119226374144.0000 entropy=3.0410 ent_coef=0.001829 approx_kl=0.0049 kl_stop=1 intervention_rate=0.0820 front_blocked=0
|
|
[Eval 18960] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-336913.4 mean_steps=13.8
|
|
[Episode 18970] reward=-44568466.4 actor_loss=0.0992 critic_loss=118300632064.0000 entropy=3.0372 ent_coef=0.001829 approx_kl=0.0040 kl_stop=1 intervention_rate=0.0697 front_blocked=0
|
|
[Episode 18980] reward=-69936817.3 actor_loss=0.1685 critic_loss=137552294343.1111 entropy=3.0369 ent_coef=0.001829 approx_kl=0.0035 kl_stop=1 intervention_rate=0.0944 front_blocked=0
|
|
[Eval 18980] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-471239.7 mean_steps=12.7
|
|
[Episode 18990] reward=-59994420.5 actor_loss=0.0910 critic_loss=125454866432.0000 entropy=3.0360 ent_coef=0.001829 approx_kl=0.0031 kl_stop=1 intervention_rate=0.0853 front_blocked=0
|
|
[Episode 19000] reward=-50996660.0 actor_loss=0.1138 critic_loss=121886685866.6667 entropy=3.0356 ent_coef=0.001829 approx_kl=0.0036 kl_stop=1 intervention_rate=0.0840 front_blocked=0
|
|
[Eval 19000] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-490999.7 mean_steps=13.1
|
|
[Episode 19010] reward=-53776772.8 actor_loss=0.1178 critic_loss=125385097216.0000 entropy=3.0367 ent_coef=0.001829 approx_kl=0.0075 kl_stop=1 intervention_rate=0.0840 front_blocked=0
|
|
[Episode 19020] reward=-61888549.8 actor_loss=0.1064 critic_loss=127182756522.6667 entropy=3.0346 ent_coef=0.001829 approx_kl=0.0026 kl_stop=1 intervention_rate=0.0866 front_blocked=0
|
|
[Eval 19020] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-523233.8 mean_steps=13.2
|
|
[Episode 19030] reward=-65587932.9 actor_loss=0.1100 critic_loss=131867164672.0000 entropy=3.0353 ent_coef=0.001829 approx_kl=0.0039 kl_stop=1 intervention_rate=0.0872 front_blocked=0
|
|
[Episode 19040] reward=-55610060.8 actor_loss=0.1096 critic_loss=127015807906.9091 entropy=3.0352 ent_coef=0.001829 approx_kl=0.0021 kl_stop=1 intervention_rate=0.0840 front_blocked=0
|
|
[Eval 19040] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-621897.8 mean_steps=10.7
|
|
[Episode 19050] reward=-60343284.2 actor_loss=0.1536 critic_loss=124526926701.7143 entropy=3.0306 ent_coef=0.001829 approx_kl=0.0048 kl_stop=1 intervention_rate=0.0938 front_blocked=0
|
|
[Episode 19060] reward=-50265279.0 actor_loss=0.1025 critic_loss=121572153902.5455 entropy=3.0285 ent_coef=0.001828 approx_kl=0.0025 kl_stop=1 intervention_rate=0.0729 front_blocked=0
|
|
[Eval 19060] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-451986.8 mean_steps=12.6
|
|
[Episode 19070] reward=-51231813.3 actor_loss=0.1041 critic_loss=120754135040.0000 entropy=3.0289 ent_coef=0.001828 approx_kl=0.0019 kl_stop=1 intervention_rate=0.0846 front_blocked=0
|
|
[Episode 19080] reward=-44378016.7 actor_loss=0.1007 critic_loss=117625648859.4286 entropy=3.0288 ent_coef=0.001828 approx_kl=0.0055 kl_stop=1 intervention_rate=0.0671 front_blocked=0
|
|
[Eval 19080] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-562473.3 mean_steps=11.9
|
|
[Episode 19090] reward=-63475465.8 actor_loss=0.1238 critic_loss=128520564736.0000 entropy=3.0292 ent_coef=0.001828 approx_kl=0.0065 kl_stop=1 intervention_rate=0.0859 front_blocked=0
|
|
[Episode 19100] reward=-56872144.0 actor_loss=0.1141 critic_loss=121783756800.0000 entropy=3.0290 ent_coef=0.001828 approx_kl=0.0022 kl_stop=1 intervention_rate=0.0859 front_blocked=0
|
|
[Eval 19100] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-553298.0 mean_steps=12.6
|
|
[Episode 19110] reward=-54023267.5 actor_loss=0.1277 critic_loss=124146597888.0000 entropy=3.0284 ent_coef=0.001828 approx_kl=0.0031 kl_stop=1 intervention_rate=0.0853 front_blocked=0
|
|
[Episode 19120] reward=-56279545.3 actor_loss=0.1241 critic_loss=128180568064.0000 entropy=3.0272 ent_coef=0.001828 approx_kl=0.0058 kl_stop=1 intervention_rate=0.0866 front_blocked=0
|
|
[Eval 19120] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-490252.6 mean_steps=12.7
|
|
[Episode 19130] reward=-45608077.1 actor_loss=0.1048 critic_loss=115706267420.4444 entropy=3.0261 ent_coef=0.001828 approx_kl=0.0038 kl_stop=1 intervention_rate=0.0781 front_blocked=0
|
|
[Episode 19140] reward=-47286054.1 actor_loss=0.1301 critic_loss=119433237504.0000 entropy=3.0265 ent_coef=0.001828 approx_kl=0.0040 kl_stop=1 intervention_rate=0.0768 front_blocked=0
|
|
[Eval 19140] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-502771.5 mean_steps=13.2
|
|
[Episode 19150] reward=-61510857.9 actor_loss=0.1516 critic_loss=123893860644.5714 entropy=3.0258 ent_coef=0.001828 approx_kl=0.0033 kl_stop=1 intervention_rate=0.0977 front_blocked=0
|
|
[Episode 19160] reward=-50635898.4 actor_loss=0.0976 critic_loss=121615713621.3333 entropy=3.0241 ent_coef=0.001828 approx_kl=0.0030 kl_stop=1 intervention_rate=0.0820 front_blocked=0
|
|
[Eval 19160] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-501698.7 mean_steps=12.8
|
|
[Episode 19170] reward=-51849956.9 actor_loss=0.1469 critic_loss=123260786395.4286 entropy=3.0209 ent_coef=0.001827 approx_kl=0.0058 kl_stop=1 intervention_rate=0.0859 front_blocked=0
|
|
[Episode 19180] reward=-57045644.4 actor_loss=0.1024 critic_loss=123515747441.7778 entropy=3.0169 ent_coef=0.001827 approx_kl=0.0029 kl_stop=1 intervention_rate=0.0833 front_blocked=0
|
|
[Eval 19180] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-513722.2 mean_steps=11.2
|
|
[Episode 19190] reward=-54738580.0 actor_loss=0.1266 critic_loss=127188335274.6667 entropy=3.0172 ent_coef=0.001827 approx_kl=0.0036 kl_stop=1 intervention_rate=0.0775 front_blocked=0
|
|
[Episode 19200] reward=-44939647.6 actor_loss=0.0760 critic_loss=121319306581.3333 entropy=3.0164 ent_coef=0.001827 approx_kl=0.0014 kl_stop=1 intervention_rate=0.0664 front_blocked=0
|
|
[Eval 19200] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-382069.1 mean_steps=13.6
|
|
[Episode 19210] reward=-58686971.0 actor_loss=0.1361 critic_loss=128092461738.6667 entropy=3.0149 ent_coef=0.001827 approx_kl=0.0046 kl_stop=1 intervention_rate=0.0911 front_blocked=0
|
|
[Episode 19220] reward=-51215187.9 actor_loss=0.1250 critic_loss=121823889408.0000 entropy=3.0157 ent_coef=0.001827 approx_kl=0.0026 kl_stop=1 intervention_rate=0.0911 front_blocked=0
|
|
[Eval 19220] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-530011.4 mean_steps=12.6
|
|
[Episode 19230] reward=-48779827.9 actor_loss=0.0947 critic_loss=125273636864.0000 entropy=3.0130 ent_coef=0.001827 approx_kl=0.0049 kl_stop=1 intervention_rate=0.0729 front_blocked=0
|
|
[Episode 19240] reward=-54574326.4 actor_loss=0.1059 critic_loss=124403458048.0000 entropy=3.0133 ent_coef=0.001827 approx_kl=0.0019 kl_stop=1 intervention_rate=0.0827 front_blocked=0
|
|
[Eval 19240] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-347063.0 mean_steps=14.3
|
|
[Episode 19250] reward=-56326563.9 actor_loss=0.1282 critic_loss=127681761865.1429 entropy=3.0121 ent_coef=0.001827 approx_kl=0.0029 kl_stop=1 intervention_rate=0.0853 front_blocked=0
|
|
[Episode 19260] reward=-56259172.9 actor_loss=0.1536 critic_loss=122739435292.4444 entropy=3.0145 ent_coef=0.001827 approx_kl=0.0041 kl_stop=1 intervention_rate=0.0885 front_blocked=0
|
|
[Eval 19260] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-450209.1 mean_steps=12.6
|
|
[Episode 19270] reward=-49047836.7 actor_loss=0.1039 critic_loss=122932958003.2000 entropy=3.0121 ent_coef=0.001827 approx_kl=0.0014 kl_stop=1 intervention_rate=0.0755 front_blocked=0
|
|
[Episode 19280] reward=-62678487.4 actor_loss=0.1225 critic_loss=130579468288.0000 entropy=3.0117 ent_coef=0.001826 approx_kl=0.0034 kl_stop=1 intervention_rate=0.0911 front_blocked=0
|
|
[Eval 19280] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-451842.8 mean_steps=13.9
|
|
[Episode 19290] reward=-53871482.1 actor_loss=0.1083 critic_loss=125019496448.0000 entropy=3.0109 ent_coef=0.001826 approx_kl=0.0039 kl_stop=1 intervention_rate=0.0898 front_blocked=0
|
|
[Episode 19300] reward=-60406170.5 actor_loss=0.1408 critic_loss=126152185856.0000 entropy=3.0118 ent_coef=0.001826 approx_kl=0.0022 kl_stop=1 intervention_rate=0.0911 front_blocked=0
|
|
[Eval 19300] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-559298.8 mean_steps=11.7
|
|
[Episode 19310] reward=-59614693.1 actor_loss=0.1118 critic_loss=128815809682.2857 entropy=3.0110 ent_coef=0.001826 approx_kl=0.0028 kl_stop=1 intervention_rate=0.0820 front_blocked=0
|
|
[Episode 19320] reward=-46183227.8 actor_loss=0.0988 critic_loss=119061043882.6667 entropy=3.0127 ent_coef=0.001826 approx_kl=0.0048 kl_stop=1 intervention_rate=0.0755 front_blocked=0
|
|
[Eval 19320] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-549841.4 mean_steps=11.6
|
|
[Episode 19330] reward=-60239250.5 actor_loss=0.1519 critic_loss=122565778090.6667 entropy=3.0178 ent_coef=0.001826 approx_kl=0.0043 kl_stop=1 intervention_rate=0.0983 front_blocked=0
|
|
[Episode 19340] reward=-58535784.8 actor_loss=0.1732 critic_loss=122514087936.0000 entropy=3.0177 ent_coef=0.001826 approx_kl=0.0061 kl_stop=1 intervention_rate=0.0905 front_blocked=0
|
|
[Eval 19340] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-381944.2 mean_steps=15.1
|
|
[Episode 19350] reward=-58509516.8 actor_loss=0.1478 critic_loss=124441037482.6667 entropy=3.0140 ent_coef=0.001826 approx_kl=0.0036 kl_stop=1 intervention_rate=0.0944 front_blocked=0
|
|
[Episode 19360] reward=-49111516.1 actor_loss=0.1042 critic_loss=121769389260.8000 entropy=3.0150 ent_coef=0.001826 approx_kl=0.0048 kl_stop=1 intervention_rate=0.0801 front_blocked=0
|
|
[Eval 19360] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-583306.9 mean_steps=12.1
|
|
[Episode 19370] reward=-59863910.5 actor_loss=0.1204 critic_loss=125608325705.1429 entropy=3.0155 ent_coef=0.001826 approx_kl=0.0025 kl_stop=1 intervention_rate=0.0853 front_blocked=0
|