SR-ARPOD/Logs/nohup_tuned_20260331_172054.out
2026-04-01 22:48:53 +08:00

752 lines
103 KiB
Plaintext

nohup: ignoring input
[Episode 10] reward=-63851441.9 actor_loss=0.3133 critic_loss=129841111367.6800 entropy=4.2570 approx_kl=0.0029 kl_stop=1 intervention_rate=0.0977 front_blocked=0
[Episode 20] reward=-52909280.9 actor_loss=0.2052 critic_loss=125718149044.1481 entropy=4.2537 approx_kl=0.0015 kl_stop=1 intervention_rate=0.0846 front_blocked=0
[Eval 20] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-659529.7 mean_steps=11.0
[Episode 30] reward=-63338958.3 actor_loss=0.2023 critic_loss=128646346662.9565 entropy=4.2573 approx_kl=0.0015 kl_stop=1 intervention_rate=0.1022 front_blocked=0
[Episode 40] reward=-49172257.0 actor_loss=0.1754 critic_loss=123016774087.1111 entropy=4.2530 approx_kl=0.0033 kl_stop=1 intervention_rate=0.0840 front_blocked=0
[Eval 40] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-517207.1 mean_steps=13.5
[Episode 50] reward=-51149792.5 actor_loss=0.1935 critic_loss=123220183941.1200 entropy=4.2410 approx_kl=0.0028 kl_stop=1 intervention_rate=0.0840 front_blocked=0
[Episode 60] reward=-55862936.2 actor_loss=0.1812 critic_loss=126646251671.7037 entropy=4.2325 approx_kl=0.0041 kl_stop=1 intervention_rate=0.0905 front_blocked=0
[Eval 60] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-621582.4 mean_steps=11.7
[Episode 70] reward=-54642117.1 actor_loss=0.1261 critic_loss=128862159140.5714 entropy=4.2207 approx_kl=0.0041 kl_stop=1 intervention_rate=0.0768 front_blocked=0
[Episode 80] reward=-53812567.5 actor_loss=0.1524 critic_loss=127352919654.4000 entropy=4.2125 approx_kl=0.0044 kl_stop=1 intervention_rate=0.0872 front_blocked=0
[Eval 80] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-554723.5 mean_steps=13.3
[Episode 90] reward=-53788404.0 actor_loss=0.1607 critic_loss=122581215704.6154 entropy=4.2021 approx_kl=0.0041 kl_stop=1 intervention_rate=0.0859 front_blocked=0
[Episode 100] reward=-58403964.7 actor_loss=0.1214 critic_loss=131865793629.0909 entropy=4.1949 approx_kl=0.0046 kl_stop=1 intervention_rate=0.0801 front_blocked=0
[Eval 100] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-599500.5 mean_steps=13.1
[Episode 110] reward=-56019319.8 actor_loss=0.1087 critic_loss=129121676051.6923 entropy=4.1890 approx_kl=0.0058 kl_stop=1 intervention_rate=0.0768 front_blocked=0
[Episode 120] reward=-48692842.7 actor_loss=0.1289 critic_loss=126438932080.3902 entropy=4.1896 approx_kl=0.0040 kl_stop=1 intervention_rate=0.0853 front_blocked=0
[Eval 120] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-511487.2 mean_steps=12.7
[Episode 130] reward=-47417266.9 actor_loss=0.0855 critic_loss=121657130651.8261 entropy=4.1826 approx_kl=0.0039 kl_stop=1 intervention_rate=0.0723 front_blocked=0
[Episode 140] reward=-46462479.3 actor_loss=0.0896 critic_loss=117564085979.4286 entropy=4.1722 approx_kl=0.0044 kl_stop=1 intervention_rate=0.0749 front_blocked=0
[Eval 140] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-577031.6 mean_steps=13.8
[Episode 150] reward=-48241977.8 actor_loss=0.1061 critic_loss=120333679379.6923 entropy=4.1693 approx_kl=0.0051 kl_stop=1 intervention_rate=0.0814 front_blocked=0
[Episode 160] reward=-50526362.3 actor_loss=0.1298 critic_loss=126040323868.4444 entropy=4.1651 approx_kl=0.0021 kl_stop=1 intervention_rate=0.0768 front_blocked=0
[Eval 160] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-469199.3 mean_steps=13.2
[Episode 170] reward=-50420712.4 actor_loss=0.0964 critic_loss=123934441935.6981 entropy=4.1694 approx_kl=0.0039 kl_stop=1 intervention_rate=0.0781 front_blocked=0
[Episode 180] reward=-44053883.7 actor_loss=0.1051 critic_loss=120063532509.8667 entropy=4.1668 approx_kl=0.0035 kl_stop=1 intervention_rate=0.0729 front_blocked=0
[Eval 180] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-467701.1 mean_steps=13.3
[Episode 190] reward=-58448365.4 actor_loss=0.1412 critic_loss=132708793548.8000 entropy=4.1628 approx_kl=0.0038 kl_stop=1 intervention_rate=0.0905 front_blocked=0
[Episode 200] reward=-46027856.4 actor_loss=0.1015 critic_loss=122710308864.0000 entropy=4.1636 approx_kl=0.0029 kl_stop=1 intervention_rate=0.0671 front_blocked=0
[Eval 200] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-463335.4 mean_steps=13.7
[Episode 210] reward=-54613064.5 actor_loss=0.1142 critic_loss=127444807307.6364 entropy=4.1629 approx_kl=0.0052 kl_stop=1 intervention_rate=0.0820 front_blocked=0
[Episode 220] reward=-58815779.4 actor_loss=0.1784 critic_loss=127625721173.3333 entropy=4.1672 approx_kl=0.0038 kl_stop=1 intervention_rate=0.0911 front_blocked=0
[Eval 220] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-468694.7 mean_steps=14.2
[Episode 230] reward=-52522743.0 actor_loss=0.1240 critic_loss=123258433908.3636 entropy=4.1641 approx_kl=0.0055 kl_stop=1 intervention_rate=0.0781 front_blocked=0
[Episode 240] reward=-60758151.0 actor_loss=0.1185 critic_loss=130995658069.3333 entropy=4.1625 approx_kl=0.0027 kl_stop=1 intervention_rate=0.0859 front_blocked=0
[Eval 240] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-370537.8 mean_steps=14.9
[Episode 250] reward=-41300205.7 actor_loss=0.1243 critic_loss=121365454301.8667 entropy=4.1606 approx_kl=0.0059 kl_stop=1 intervention_rate=0.0729 front_blocked=0
[Episode 260] reward=-54895451.5 actor_loss=0.1254 critic_loss=125863833108.4800 entropy=4.1538 approx_kl=0.0032 kl_stop=1 intervention_rate=0.0749 front_blocked=0
[Eval 260] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-416967.0 mean_steps=14.4
[Episode 270] reward=-37540646.6 actor_loss=0.1371 critic_loss=117602611200.0000 entropy=4.1546 approx_kl=0.0040 kl_stop=1 intervention_rate=0.0716 front_blocked=0
[Episode 280] reward=-45923629.7 actor_loss=0.1467 critic_loss=120025750987.0345 entropy=4.1441 approx_kl=0.0034 kl_stop=1 intervention_rate=0.0801 front_blocked=0
[Eval 280] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-463645.7 mean_steps=13.2
[Episode 290] reward=-54215966.1 actor_loss=0.1509 critic_loss=132258488320.0000 entropy=4.1447 approx_kl=0.0036 kl_stop=1 intervention_rate=0.0898 front_blocked=0
[Episode 300] reward=-47236863.5 actor_loss=0.1140 critic_loss=122301659623.6190 entropy=4.1377 approx_kl=0.0052 kl_stop=1 intervention_rate=0.0755 front_blocked=0
[Eval 300] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-412114.9 mean_steps=14.2
[Episode 310] reward=-49163663.8 actor_loss=0.1295 critic_loss=127583468499.4783 entropy=4.1266 approx_kl=0.0025 kl_stop=1 intervention_rate=0.0807 front_blocked=0
[Episode 320] reward=-51529689.0 actor_loss=0.1167 critic_loss=122908692795.0769 entropy=4.1265 approx_kl=0.0032 kl_stop=1 intervention_rate=0.0820 front_blocked=0
[Eval 320] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-550024.8 mean_steps=12.9
[Episode 330] reward=-53977917.5 actor_loss=0.1495 critic_loss=126271170439.5294 entropy=4.1323 approx_kl=0.0052 kl_stop=1 intervention_rate=0.0820 front_blocked=0
[Episode 340] reward=-57034051.2 actor_loss=0.1362 critic_loss=124059248867.5556 entropy=4.1235 approx_kl=0.0030 kl_stop=1 intervention_rate=0.0879 front_blocked=0
[Eval 340] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-448276.2 mean_steps=14.3
[Episode 350] reward=-63361947.5 actor_loss=0.1509 critic_loss=133663209795.3684 entropy=4.1227 approx_kl=0.0042 kl_stop=1 intervention_rate=0.0931 front_blocked=0
[Episode 360] reward=-58609772.5 actor_loss=0.1188 critic_loss=130560599654.4000 entropy=4.1271 approx_kl=0.0035 kl_stop=1 intervention_rate=0.0872 front_blocked=0
[Eval 360] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-453929.1 mean_steps=13.6
[Episode 370] reward=-54098354.9 actor_loss=0.1224 critic_loss=125770548224.0000 entropy=4.1225 approx_kl=0.0039 kl_stop=1 intervention_rate=0.0775 front_blocked=0
[Episode 380] reward=-39235736.8 actor_loss=0.0969 critic_loss=121582762211.5556 entropy=4.1162 approx_kl=0.0036 kl_stop=1 intervention_rate=0.0677 front_blocked=0
[Eval 380] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-285368.5 mean_steps=14.9
[Episode 390] reward=-53908908.1 actor_loss=0.1229 critic_loss=125522839913.4118 entropy=4.1127 approx_kl=0.0056 kl_stop=1 intervention_rate=0.0788 front_blocked=0
[Episode 400] reward=-51017454.2 actor_loss=0.1078 critic_loss=126417592848.5161 entropy=4.1135 approx_kl=0.0049 kl_stop=1 intervention_rate=0.0749 front_blocked=0
[Eval 400] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-419724.7 mean_steps=13.4
[Episode 410] reward=-54409983.7 actor_loss=0.1182 critic_loss=125454242909.0909 entropy=4.1135 approx_kl=0.0043 kl_stop=1 intervention_rate=0.0781 front_blocked=0
[Episode 420] reward=-60462489.2 actor_loss=0.1509 critic_loss=133721099342.7692 entropy=4.1122 approx_kl=0.0019 kl_stop=1 intervention_rate=0.0840 front_blocked=0
[Eval 420] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-660049.6 mean_steps=10.8
[Episode 430] reward=-41776055.6 actor_loss=0.1165 critic_loss=119177563704.8889 entropy=4.1072 approx_kl=0.0027 kl_stop=1 intervention_rate=0.0710 front_blocked=0
[Episode 440] reward=-42006451.1 actor_loss=0.0992 critic_loss=117518152655.2381 entropy=4.1032 approx_kl=0.0038 kl_stop=1 intervention_rate=0.0658 front_blocked=0
[Eval 440] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-519690.6 mean_steps=13.3
[Episode 450] reward=-50777877.1 actor_loss=0.0995 critic_loss=124211693037.0370 entropy=4.1056 approx_kl=0.0037 kl_stop=1 intervention_rate=0.0723 front_blocked=0
[Episode 460] reward=-50951941.1 actor_loss=0.1666 critic_loss=129603725019.4286 entropy=4.1039 approx_kl=0.0055 kl_stop=1 intervention_rate=0.0846 front_blocked=0
[Eval 460] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-576004.5 mean_steps=12.8
[Episode 470] reward=-45526568.1 actor_loss=0.1052 critic_loss=120999535852.3077 entropy=4.1043 approx_kl=0.0039 kl_stop=1 intervention_rate=0.0781 front_blocked=0
[Episode 480] reward=-49230525.7 actor_loss=0.0901 critic_loss=125805435289.6000 entropy=4.1048 approx_kl=0.0037 kl_stop=1 intervention_rate=0.0658 front_blocked=0
[Eval 480] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-415811.0 mean_steps=13.4
[Episode 490] reward=-56531919.5 actor_loss=0.1436 critic_loss=128889172423.1111 entropy=4.1008 approx_kl=0.0049 kl_stop=1 intervention_rate=0.0853 front_blocked=0
[Episode 500] reward=-65162147.6 actor_loss=0.1214 critic_loss=136966407437.4737 entropy=4.1001 approx_kl=0.0031 kl_stop=1 intervention_rate=0.0833 front_blocked=0
[Eval 500] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-368591.5 mean_steps=15.8
[Episode 510] reward=-54030691.3 actor_loss=0.0869 critic_loss=132104318702.9333 entropy=4.1034 approx_kl=0.0055 kl_stop=1 intervention_rate=0.0801 front_blocked=0
[Episode 520] reward=-48080088.3 actor_loss=0.1495 critic_loss=123766300672.0000 entropy=4.1048 approx_kl=0.0032 kl_stop=1 intervention_rate=0.0840 front_blocked=0
[Eval 520] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-356875.7 mean_steps=15.1
[Episode 530] reward=-52754892.1 actor_loss=0.1157 critic_loss=127646022314.6667 entropy=4.1166 approx_kl=0.0067 kl_stop=1 intervention_rate=0.0807 front_blocked=0
[Episode 540] reward=-50062530.4 actor_loss=0.0988 critic_loss=123033712640.0000 entropy=4.1157 approx_kl=0.0021 kl_stop=1 intervention_rate=0.0827 front_blocked=0
[Eval 540] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-303281.6 mean_steps=16.1
[Episode 550] reward=-44886629.1 actor_loss=0.0948 critic_loss=125371137675.6364 entropy=4.1178 approx_kl=0.0026 kl_stop=1 intervention_rate=0.0716 front_blocked=0
[Episode 560] reward=-40329286.6 actor_loss=0.1056 critic_loss=119508542805.3333 entropy=4.1157 approx_kl=0.0033 kl_stop=1 intervention_rate=0.0677 front_blocked=0
[Eval 560] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-431127.3 mean_steps=14.4
[Episode 570] reward=-69341343.3 actor_loss=0.1539 critic_loss=136109922167.4667 entropy=4.1138 approx_kl=0.0051 kl_stop=1 intervention_rate=0.0990 front_blocked=0
[Episode 580] reward=-57042616.9 actor_loss=0.1455 critic_loss=130603050715.4286 entropy=4.1134 approx_kl=0.0052 kl_stop=1 intervention_rate=0.0866 front_blocked=0
[Eval 580] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-383689.1 mean_steps=14.1
[Episode 590] reward=-54334606.6 actor_loss=0.0851 critic_loss=126203502592.0000 entropy=4.1083 approx_kl=0.0058 kl_stop=1 intervention_rate=0.0820 front_blocked=0
[Episode 600] reward=-41131009.3 actor_loss=0.0967 critic_loss=119859347456.0000 entropy=4.1073 approx_kl=0.0048 kl_stop=1 intervention_rate=0.0690 front_blocked=0
[Eval 600] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-436221.3 mean_steps=14.4
[Episode 610] reward=-58494083.4 actor_loss=0.1558 critic_loss=129039225651.2000 entropy=4.1041 approx_kl=0.0049 kl_stop=1 intervention_rate=0.0964 front_blocked=0
[Episode 620] reward=-56102412.6 actor_loss=0.1243 critic_loss=127130192233.4118 entropy=4.1042 approx_kl=0.0021 kl_stop=1 intervention_rate=0.0820 front_blocked=0
[Eval 620] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-504245.1 mean_steps=13.2
[Episode 630] reward=-54504577.5 actor_loss=0.1055 critic_loss=127244722176.0000 entropy=4.1051 approx_kl=0.0060 kl_stop=1 intervention_rate=0.0872 front_blocked=0
[Episode 640] reward=-42215395.6 actor_loss=0.1167 critic_loss=118012494409.1429 entropy=4.1085 approx_kl=0.0020 kl_stop=1 intervention_rate=0.0736 front_blocked=0
[Eval 640] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-405309.3 mean_steps=14.4
[Episode 650] reward=-49450866.5 actor_loss=0.1295 critic_loss=122435562442.1053 entropy=4.1035 approx_kl=0.0053 kl_stop=1 intervention_rate=0.0814 front_blocked=0
[Episode 660] reward=-48134936.1 actor_loss=0.1223 critic_loss=120629730896.8421 entropy=4.0957 approx_kl=0.0053 kl_stop=1 intervention_rate=0.0814 front_blocked=0
[Eval 660] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-512910.7 mean_steps=13.5
[Episode 670] reward=-56494308.3 actor_loss=0.1259 critic_loss=126244014372.5714 entropy=4.0959 approx_kl=0.0042 kl_stop=1 intervention_rate=0.0859 front_blocked=0
[Episode 680] reward=-44625826.1 actor_loss=0.1035 critic_loss=120915732322.4615 entropy=4.0928 approx_kl=0.0042 kl_stop=1 intervention_rate=0.0807 front_blocked=0
[Eval 680] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-415923.4 mean_steps=15.1
[Episode 690] reward=-53608168.5 actor_loss=0.1345 critic_loss=127067420829.5385 entropy=4.0952 approx_kl=0.0040 kl_stop=1 intervention_rate=0.0814 front_blocked=0
[Episode 700] reward=-51198448.9 actor_loss=0.1349 critic_loss=127172109458.2857 entropy=4.1014 approx_kl=0.0041 kl_stop=1 intervention_rate=0.0788 front_blocked=0
[Eval 700] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-536968.1 mean_steps=12.2
[Episode 710] reward=-49047795.0 actor_loss=0.1230 critic_loss=124410231107.3684 entropy=4.1039 approx_kl=0.0055 kl_stop=1 intervention_rate=0.0807 front_blocked=0
[Episode 720] reward=-43197509.9 actor_loss=0.0801 critic_loss=123264521947.4286 entropy=4.0999 approx_kl=0.0042 kl_stop=1 intervention_rate=0.0716 front_blocked=0
[Eval 720] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-452868.9 mean_steps=14.5
[Episode 730] reward=-54610830.5 actor_loss=0.1274 critic_loss=122859968632.4706 entropy=4.0967 approx_kl=0.0051 kl_stop=1 intervention_rate=0.0833 front_blocked=0
[Episode 740] reward=-55982670.8 actor_loss=0.1134 critic_loss=132333847272.7273 entropy=4.0998 approx_kl=0.0046 kl_stop=1 intervention_rate=0.0820 front_blocked=0
[Eval 740] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-527606.4 mean_steps=12.0
[Episode 750] reward=-39677112.9 actor_loss=0.1365 critic_loss=119851494968.8889 entropy=4.0938 approx_kl=0.0044 kl_stop=1 intervention_rate=0.0794 front_blocked=0
[Episode 760] reward=-65618021.1 actor_loss=0.1208 critic_loss=133777637376.0000 entropy=4.0909 approx_kl=0.0050 kl_stop=1 intervention_rate=0.0872 front_blocked=0
[Eval 760] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-536502.4 mean_steps=14.3
[Episode 770] reward=-61869451.4 actor_loss=0.1033 critic_loss=132674900340.3636 entropy=4.0929 approx_kl=0.0041 kl_stop=1 intervention_rate=0.0853 front_blocked=0
[Episode 780] reward=-60111162.9 actor_loss=0.1266 critic_loss=129910136201.8462 entropy=4.0879 approx_kl=0.0037 kl_stop=1 intervention_rate=0.0853 front_blocked=0
[Eval 780] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-455155.6 mean_steps=13.7
[Episode 790] reward=-52744003.2 actor_loss=0.0951 critic_loss=125771198779.0769 entropy=4.0903 approx_kl=0.0030 kl_stop=1 intervention_rate=0.0762 front_blocked=0
[Episode 800] reward=-49079541.0 actor_loss=0.1213 critic_loss=121356079104.0000 entropy=4.0944 approx_kl=0.0045 kl_stop=1 intervention_rate=0.0814 front_blocked=0
[Eval 800] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-505746.0 mean_steps=13.4
[Episode 810] reward=-53364194.6 actor_loss=0.1324 critic_loss=127553533479.3846 entropy=4.0922 approx_kl=0.0042 kl_stop=1 intervention_rate=0.0853 front_blocked=0
[Episode 820] reward=-50702617.2 actor_loss=0.1020 critic_loss=122166909440.0000 entropy=4.0897 approx_kl=0.0035 kl_stop=1 intervention_rate=0.0755 front_blocked=0
[Eval 820] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-587263.8 mean_steps=12.8
[Episode 830] reward=-52005120.9 actor_loss=0.0896 critic_loss=127517688627.2000 entropy=4.0884 approx_kl=0.0016 kl_stop=1 intervention_rate=0.0768 front_blocked=0
[Episode 840] reward=-53689758.6 actor_loss=0.1186 critic_loss=128474061755.7333 entropy=4.0832 approx_kl=0.0031 kl_stop=1 intervention_rate=0.0866 front_blocked=0
[Eval 840] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-459469.2 mean_steps=14.4
[Episode 850] reward=-49083547.8 actor_loss=0.1096 critic_loss=123592378270.4762 entropy=4.0870 approx_kl=0.0072 kl_stop=1 intervention_rate=0.0781 front_blocked=0
[Episode 860] reward=-50967944.0 actor_loss=0.1122 critic_loss=125385562794.6667 entropy=4.0887 approx_kl=0.0062 kl_stop=1 intervention_rate=0.0846 front_blocked=0
[Eval 860] success_rate=0.700 qp_infeasible_rate=0.300 mean_return=-203741.2 mean_steps=16.9
[Episode 870] reward=-60549388.4 actor_loss=0.1276 critic_loss=132672780580.5714 entropy=4.0870 approx_kl=0.0035 kl_stop=1 intervention_rate=0.0872 front_blocked=0
[Episode 880] reward=-48152964.2 actor_loss=0.1392 critic_loss=124041073095.1111 entropy=4.0872 approx_kl=0.0041 kl_stop=1 intervention_rate=0.0840 front_blocked=0
[Eval 880] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-375803.8 mean_steps=15.9
[Episode 890] reward=-58604030.6 actor_loss=0.1156 critic_loss=131729989150.1176 entropy=4.0876 approx_kl=0.0041 kl_stop=1 intervention_rate=0.0775 front_blocked=0
[Episode 900] reward=-56695368.2 actor_loss=0.1354 critic_loss=130646169757.5385 entropy=4.0905 approx_kl=0.0057 kl_stop=1 intervention_rate=0.0853 front_blocked=0
[Eval 900] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-536173.4 mean_steps=12.8
[Episode 910] reward=-59747086.7 actor_loss=0.1448 critic_loss=129020513484.8000 entropy=4.0946 approx_kl=0.0042 kl_stop=1 intervention_rate=0.0892 front_blocked=0
[Episode 920] reward=-49355171.2 actor_loss=0.1300 critic_loss=123338422874.3529 entropy=4.0971 approx_kl=0.0055 kl_stop=1 intervention_rate=0.0775 front_blocked=0
[Eval 920] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-466952.2 mean_steps=14.1
[Episode 930] reward=-59074663.4 actor_loss=0.0926 critic_loss=128487752499.2000 entropy=4.0958 approx_kl=0.0034 kl_stop=1 intervention_rate=0.0755 front_blocked=0
[Episode 940] reward=-49854005.1 actor_loss=0.1162 critic_loss=123004204373.3333 entropy=4.0931 approx_kl=0.0048 kl_stop=1 intervention_rate=0.0775 front_blocked=0
[Eval 940] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-472680.9 mean_steps=14.9
[Episode 950] reward=-58097633.2 actor_loss=0.1107 critic_loss=131217610069.3333 entropy=4.0974 approx_kl=0.0035 kl_stop=1 intervention_rate=0.0840 front_blocked=0
[Episode 960] reward=-42194792.4 actor_loss=0.1191 critic_loss=119454770062.2222 entropy=4.1001 approx_kl=0.0068 kl_stop=1 intervention_rate=0.0710 front_blocked=0
[Eval 960] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-283855.6 mean_steps=15.2
[Episode 970] reward=-57445406.0 actor_loss=0.1434 critic_loss=128927187763.2000 entropy=4.1010 approx_kl=0.0022 kl_stop=1 intervention_rate=0.0872 front_blocked=0
[Episode 980] reward=-46348430.5 actor_loss=0.0966 critic_loss=123578097033.8462 entropy=4.0985 approx_kl=0.0045 kl_stop=1 intervention_rate=0.0820 front_blocked=0
[Eval 980] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-509783.2 mean_steps=13.3
[Episode 990] reward=-56651995.4 actor_loss=0.1335 critic_loss=127203914673.2308 entropy=4.0985 approx_kl=0.0044 kl_stop=1 intervention_rate=0.0911 front_blocked=0
[Episode 1000] reward=-48859814.4 actor_loss=0.0897 critic_loss=124517843763.2000 entropy=4.1015 approx_kl=0.0044 kl_stop=1 intervention_rate=0.0755 front_blocked=0
[Eval 1000] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-489841.2 mean_steps=12.2
[Episode 1010] reward=-42434764.8 actor_loss=0.0997 critic_loss=120569988710.4000 entropy=4.1006 approx_kl=0.0049 kl_stop=1 intervention_rate=0.0788 front_blocked=0
[Episode 1020] reward=-50498497.2 actor_loss=0.1188 critic_loss=127184282965.3333 entropy=4.1012 approx_kl=0.0037 kl_stop=1 intervention_rate=0.0794 front_blocked=0
[Eval 1020] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-460451.0 mean_steps=14.8
[Episode 1030] reward=-45974704.6 actor_loss=0.0942 critic_loss=122673079637.3333 entropy=4.1032 approx_kl=0.0047 kl_stop=1 intervention_rate=0.0723 front_blocked=0
[Episode 1040] reward=-52361004.0 actor_loss=0.1152 critic_loss=126395458706.2857 entropy=4.1066 approx_kl=0.0024 kl_stop=1 intervention_rate=0.0827 front_blocked=0
[Eval 1040] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-505279.7 mean_steps=13.3
[Episode 1050] reward=-49529670.8 actor_loss=0.1259 critic_loss=125815912261.8182 entropy=4.1101 approx_kl=0.0038 kl_stop=1 intervention_rate=0.0781 front_blocked=0
[Episode 1060] reward=-51732299.7 actor_loss=0.1047 critic_loss=124897886208.0000 entropy=4.1117 approx_kl=0.0043 kl_stop=1 intervention_rate=0.0749 front_blocked=0
[Eval 1060] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-513166.7 mean_steps=12.7
[Episode 1070] reward=-56892973.4 actor_loss=0.1442 critic_loss=129418401382.4000 entropy=4.1105 approx_kl=0.0065 kl_stop=1 intervention_rate=0.0827 front_blocked=0
[Episode 1080] reward=-50898271.7 actor_loss=0.0934 critic_loss=122369507328.0000 entropy=4.1095 approx_kl=0.0045 kl_stop=1 intervention_rate=0.0827 front_blocked=0
[Eval 1080] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-385494.8 mean_steps=14.1
[Episode 1090] reward=-59122632.5 actor_loss=0.1073 critic_loss=132310355968.0000 entropy=4.1122 approx_kl=0.0045 kl_stop=1 intervention_rate=0.0853 front_blocked=0
[Episode 1100] reward=-63740220.7 actor_loss=0.1190 critic_loss=128042275986.2857 entropy=4.1089 approx_kl=0.0061 kl_stop=1 intervention_rate=0.0951 front_blocked=0
[Eval 1100] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-271627.0 mean_steps=15.4
[Episode 1110] reward=-52791967.7 actor_loss=0.1274 critic_loss=125026306234.1818 entropy=4.1090 approx_kl=0.0029 kl_stop=1 intervention_rate=0.0833 front_blocked=0
[Episode 1120] reward=-67588993.9 actor_loss=0.0876 critic_loss=135222414987.6364 entropy=4.1057 approx_kl=0.0025 kl_stop=1 intervention_rate=0.0898 front_blocked=0
[Eval 1120] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-531677.4 mean_steps=12.9
[Episode 1130] reward=-58425643.1 actor_loss=0.1024 critic_loss=128318795320.8889 entropy=4.1078 approx_kl=0.0043 kl_stop=1 intervention_rate=0.0820 front_blocked=0
[Episode 1140] reward=-45345114.5 actor_loss=0.0824 critic_loss=122748301116.9524 entropy=4.1028 approx_kl=0.0045 kl_stop=1 intervention_rate=0.0762 front_blocked=0
[Eval 1140] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-387045.3 mean_steps=14.9
[Episode 1150] reward=-60702630.4 actor_loss=0.1076 critic_loss=132797119283.2000 entropy=4.1043 approx_kl=0.0053 kl_stop=1 intervention_rate=0.0846 front_blocked=0
[Episode 1160] reward=-56557080.9 actor_loss=0.1406 critic_loss=128089452953.6000 entropy=4.1100 approx_kl=0.0050 kl_stop=1 intervention_rate=0.0872 front_blocked=0
[Eval 1160] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-463472.3 mean_steps=12.9
[Episode 1170] reward=-58508303.4 actor_loss=0.1160 critic_loss=131780499366.9565 entropy=4.1085 approx_kl=0.0044 kl_stop=1 intervention_rate=0.0807 front_blocked=0
[Episode 1180] reward=-52424411.7 actor_loss=0.1067 critic_loss=127346158445.7143 entropy=4.1094 approx_kl=0.0036 kl_stop=1 intervention_rate=0.0840 front_blocked=0
[Eval 1180] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-569273.7 mean_steps=12.7
[Episode 1190] reward=-46301180.8 actor_loss=0.1004 critic_loss=121172263343.1579 entropy=4.1098 approx_kl=0.0047 kl_stop=1 intervention_rate=0.0788 front_blocked=0
[Episode 1200] reward=-51072707.7 actor_loss=0.1045 critic_loss=126137071802.1818 entropy=4.1144 approx_kl=0.0045 kl_stop=1 intervention_rate=0.0814 front_blocked=0
[Eval 1200] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-449772.3 mean_steps=12.8
[Episode 1210] reward=-61651012.4 actor_loss=0.1291 critic_loss=126331284386.9091 entropy=4.1150 approx_kl=0.0058 kl_stop=1 intervention_rate=0.0846 front_blocked=0
[Episode 1220] reward=-51621838.8 actor_loss=0.1257 critic_loss=124405199257.6000 entropy=4.1091 approx_kl=0.0049 kl_stop=1 intervention_rate=0.0807 front_blocked=0
[Eval 1220] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-579817.4 mean_steps=11.9
[Episode 1230] reward=-43700788.4 actor_loss=0.0811 critic_loss=121764090801.2308 entropy=4.1097 approx_kl=0.0053 kl_stop=1 intervention_rate=0.0742 front_blocked=0
[Episode 1240] reward=-54221079.3 actor_loss=0.1146 critic_loss=127472325632.0000 entropy=4.1110 approx_kl=0.0041 kl_stop=1 intervention_rate=0.0794 front_blocked=0
[Eval 1240] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-575391.3 mean_steps=12.0
[Episode 1250] reward=-53990318.2 actor_loss=0.1099 critic_loss=126670454411.6364 entropy=4.1142 approx_kl=0.0021 kl_stop=1 intervention_rate=0.0781 front_blocked=0
[Episode 1260] reward=-53860933.2 actor_loss=0.1326 critic_loss=128708149794.1333 entropy=4.1076 approx_kl=0.0062 kl_stop=1 intervention_rate=0.0820 front_blocked=0
[Eval 1260] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-641876.5 mean_steps=11.8
[Episode 1270] reward=-45713124.5 actor_loss=0.1147 critic_loss=119941431296.0000 entropy=4.1076 approx_kl=0.0038 kl_stop=1 intervention_rate=0.0814 front_blocked=0
[Episode 1280] reward=-52994491.0 actor_loss=0.1198 critic_loss=126722274645.3333 entropy=4.1094 approx_kl=0.0039 kl_stop=1 intervention_rate=0.0872 front_blocked=0
[Eval 1280] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-499894.3 mean_steps=13.3
[Episode 1290] reward=-57858524.2 actor_loss=0.1312 critic_loss=127134611456.0000 entropy=4.1086 approx_kl=0.0041 kl_stop=1 intervention_rate=0.0859 front_blocked=0
[Episode 1300] reward=-65307139.5 actor_loss=0.1205 critic_loss=139568746123.6364 entropy=4.1092 approx_kl=0.0049 kl_stop=1 intervention_rate=0.0866 front_blocked=0
[Eval 1300] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-415190.9 mean_steps=13.4
[Episode 1310] reward=-56249577.3 actor_loss=0.1307 critic_loss=126212412757.3333 entropy=4.1068 approx_kl=0.0030 kl_stop=1 intervention_rate=0.0827 front_blocked=0
[Episode 1320] reward=-46588350.0 actor_loss=0.1103 critic_loss=121942755620.5714 entropy=4.1086 approx_kl=0.0027 kl_stop=1 intervention_rate=0.0742 front_blocked=0
[Eval 1320] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-490953.0 mean_steps=13.7
[Episode 1330] reward=-58224670.2 actor_loss=0.0970 critic_loss=132830130995.2000 entropy=4.1077 approx_kl=0.0051 kl_stop=1 intervention_rate=0.0859 front_blocked=0
[Episode 1340] reward=-53766459.2 actor_loss=0.0819 critic_loss=128414076928.0000 entropy=4.1020 approx_kl=0.0043 kl_stop=1 intervention_rate=0.0755 front_blocked=0
[Eval 1340] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-480046.1 mean_steps=13.6
[Episode 1350] reward=-52391000.5 actor_loss=0.1054 critic_loss=125308594468.5714 entropy=4.0981 approx_kl=0.0054 kl_stop=1 intervention_rate=0.0723 front_blocked=0
[Episode 1360] reward=-57353202.5 actor_loss=0.1153 critic_loss=129771519590.4000 entropy=4.1010 approx_kl=0.0052 kl_stop=1 intervention_rate=0.0938 front_blocked=0
[Eval 1360] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-486650.4 mean_steps=13.7
[Episode 1370] reward=-55964340.5 actor_loss=0.1167 critic_loss=127115536676.5714 entropy=4.0970 approx_kl=0.0025 kl_stop=1 intervention_rate=0.0814 front_blocked=0
[Episode 1380] reward=-43784300.6 actor_loss=0.1052 critic_loss=121629644800.0000 entropy=4.0990 approx_kl=0.0048 kl_stop=1 intervention_rate=0.0807 front_blocked=0
[Eval 1380] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-412487.8 mean_steps=13.3
[Episode 1390] reward=-39534968.8 actor_loss=0.1084 critic_loss=117710890715.4286 entropy=4.0981 approx_kl=0.0035 kl_stop=1 intervention_rate=0.0749 front_blocked=0
[Episode 1400] reward=-52423477.1 actor_loss=0.0867 critic_loss=125495062155.6364 entropy=4.0953 approx_kl=0.0033 kl_stop=1 intervention_rate=0.0768 front_blocked=0
[Eval 1400] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-461076.4 mean_steps=12.8
[Episode 1410] reward=-57034422.6 actor_loss=0.1074 critic_loss=131268798464.0000 entropy=4.0890 approx_kl=0.0044 kl_stop=1 intervention_rate=0.0892 front_blocked=0
[Episode 1420] reward=-49273618.3 actor_loss=0.1087 critic_loss=126913124761.6000 entropy=4.0928 approx_kl=0.0034 kl_stop=1 intervention_rate=0.0749 front_blocked=0
[Eval 1420] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-572747.5 mean_steps=11.7
[Episode 1430] reward=-49459201.8 actor_loss=0.1176 critic_loss=126763048413.8667 entropy=4.0915 approx_kl=0.0042 kl_stop=1 intervention_rate=0.0788 front_blocked=0
[Episode 1440] reward=-56849673.3 actor_loss=0.1288 critic_loss=128745900962.9091 entropy=4.0904 approx_kl=0.0022 kl_stop=1 intervention_rate=0.0853 front_blocked=0
[Eval 1440] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-472041.7 mean_steps=12.9
[Episode 1450] reward=-44853858.8 actor_loss=0.1125 critic_loss=123278095155.2000 entropy=4.0898 approx_kl=0.0030 kl_stop=1 intervention_rate=0.0807 front_blocked=0
[Episode 1460] reward=-57872423.8 actor_loss=0.0892 critic_loss=128435092333.7143 entropy=4.0920 approx_kl=0.0037 kl_stop=1 intervention_rate=0.0827 front_blocked=0
[Eval 1460] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-592822.4 mean_steps=13.1
[Episode 1470] reward=-51200247.8 actor_loss=0.0915 critic_loss=126297299101.5385 entropy=4.0894 approx_kl=0.0064 kl_stop=1 intervention_rate=0.0749 front_blocked=0
[Episode 1480] reward=-53156910.9 actor_loss=0.0804 critic_loss=125998077021.0909 entropy=4.0951 approx_kl=0.0043 kl_stop=1 intervention_rate=0.0742 front_blocked=0
[Eval 1480] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-529609.2 mean_steps=13.7
[Episode 1490] reward=-44813064.3 actor_loss=0.1042 critic_loss=119272681787.0769 entropy=4.0935 approx_kl=0.0050 kl_stop=1 intervention_rate=0.0716 front_blocked=0
[Episode 1500] reward=-54752637.7 actor_loss=0.1001 critic_loss=130851335606.8571 entropy=4.0905 approx_kl=0.0077 kl_stop=1 intervention_rate=0.0768 front_blocked=0
[Eval 1500] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-537586.4 mean_steps=12.9
[Episode 1510] reward=-50387047.5 actor_loss=0.1282 critic_loss=121101787136.0000 entropy=4.0900 approx_kl=0.0031 kl_stop=1 intervention_rate=0.0814 front_blocked=0
[Episode 1520] reward=-55154245.2 actor_loss=0.1400 critic_loss=120094140708.5714 entropy=4.0910 approx_kl=0.0056 kl_stop=1 intervention_rate=0.0879 front_blocked=0
[Eval 1520] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-281741.0 mean_steps=15.8
[Episode 1530] reward=-54756223.7 actor_loss=0.0929 critic_loss=128339282944.0000 entropy=4.0935 approx_kl=0.0062 kl_stop=1 intervention_rate=0.0755 front_blocked=0
[Episode 1540] reward=-61427171.4 actor_loss=0.0832 critic_loss=126731833799.1111 entropy=4.0921 approx_kl=0.0053 kl_stop=1 intervention_rate=0.0859 front_blocked=0
[Eval 1540] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-580728.5 mean_steps=12.8
[Episode 1550] reward=-52677511.0 actor_loss=0.1002 critic_loss=124199227392.0000 entropy=4.0921 approx_kl=0.0067 kl_stop=1 intervention_rate=0.0788 front_blocked=0
[Episode 1560] reward=-57147559.7 actor_loss=0.0805 critic_loss=131810189312.0000 entropy=4.0947 approx_kl=0.0071 kl_stop=1 intervention_rate=0.0788 front_blocked=0
[Eval 1560] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-552809.1 mean_steps=13.4
[Episode 1570] reward=-51291977.1 actor_loss=0.1092 critic_loss=127994741646.2222 entropy=4.0969 approx_kl=0.0067 kl_stop=1 intervention_rate=0.0781 front_blocked=0
[Episode 1580] reward=-47798057.8 actor_loss=0.0924 critic_loss=124209405952.0000 entropy=4.1003 approx_kl=0.0034 kl_stop=1 intervention_rate=0.0801 front_blocked=0
[Eval 1580] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-439063.7 mean_steps=14.5
[Episode 1590] reward=-53734425.8 actor_loss=0.1026 critic_loss=131097486336.0000 entropy=4.1017 approx_kl=0.0038 kl_stop=1 intervention_rate=0.0710 front_blocked=0
[Episode 1600] reward=-66091341.9 actor_loss=0.1328 critic_loss=132726623721.7391 entropy=4.0993 approx_kl=0.0048 kl_stop=1 intervention_rate=0.0924 front_blocked=0
[Eval 1600] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-376795.1 mean_steps=14.7
[Episode 1610] reward=-42158553.4 actor_loss=0.0885 critic_loss=120024009113.6000 entropy=4.1031 approx_kl=0.0046 kl_stop=1 intervention_rate=0.0775 front_blocked=0
[Episode 1620] reward=-56442843.7 actor_loss=0.1504 critic_loss=127535428949.3333 entropy=4.0988 approx_kl=0.0052 kl_stop=1 intervention_rate=0.0840 front_blocked=0
[Eval 1620] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-416111.5 mean_steps=14.8
[Episode 1630] reward=-58618750.7 actor_loss=0.1147 critic_loss=135212074780.4444 entropy=4.0974 approx_kl=0.0059 kl_stop=1 intervention_rate=0.0885 front_blocked=0
[Episode 1640] reward=-48818712.3 actor_loss=0.0970 critic_loss=122249580544.0000 entropy=4.0946 approx_kl=0.0029 kl_stop=1 intervention_rate=0.0729 front_blocked=0
[Eval 1640] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-320370.4 mean_steps=15.3
[Episode 1650] reward=-49285918.5 actor_loss=0.1211 critic_loss=125098880708.9231 entropy=4.0953 approx_kl=0.0037 kl_stop=1 intervention_rate=0.0775 front_blocked=0
[Episode 1660] reward=-52142353.4 actor_loss=0.0963 critic_loss=124823651669.3333 entropy=4.0973 approx_kl=0.0047 kl_stop=1 intervention_rate=0.0807 front_blocked=0
[Eval 1660] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-615760.6 mean_steps=13.1
[Episode 1670] reward=-52633767.1 actor_loss=0.1191 critic_loss=130164254161.4545 entropy=4.0985 approx_kl=0.0044 kl_stop=1 intervention_rate=0.0846 front_blocked=0
[Episode 1680] reward=-50761773.6 actor_loss=0.1124 critic_loss=124321821491.2000 entropy=4.0963 approx_kl=0.0065 kl_stop=1 intervention_rate=0.0814 front_blocked=0
[Eval 1680] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-461385.5 mean_steps=13.8
[Episode 1690] reward=-57993134.0 actor_loss=0.1118 critic_loss=130089709352.4211 entropy=4.0994 approx_kl=0.0031 kl_stop=1 intervention_rate=0.0814 front_blocked=0
[Episode 1700] reward=-69348765.5 actor_loss=0.1391 critic_loss=135213960305.7778 entropy=4.0994 approx_kl=0.0054 kl_stop=1 intervention_rate=0.0931 front_blocked=0
[Eval 1700] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-426188.2 mean_steps=14.5
[Episode 1710] reward=-50832376.7 actor_loss=0.1025 critic_loss=124520627677.8667 entropy=4.1008 approx_kl=0.0035 kl_stop=1 intervention_rate=0.0762 front_blocked=0
[Episode 1720] reward=-47274226.2 actor_loss=0.0949 critic_loss=122338630041.6000 entropy=4.1029 approx_kl=0.0032 kl_stop=1 intervention_rate=0.0801 front_blocked=0
[Eval 1720] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-544414.4 mean_steps=12.9
[Episode 1730] reward=-58244187.7 actor_loss=0.1198 critic_loss=129378529280.0000 entropy=4.1042 approx_kl=0.0052 kl_stop=1 intervention_rate=0.0794 front_blocked=0
[Episode 1740] reward=-49406326.7 actor_loss=0.1058 critic_loss=119043473408.0000 entropy=4.1019 approx_kl=0.0031 kl_stop=1 intervention_rate=0.0775 front_blocked=0
[Eval 1740] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-461690.9 mean_steps=14.6
[Episode 1750] reward=-62428496.4 actor_loss=0.1057 critic_loss=133245565610.6667 entropy=4.1065 approx_kl=0.0052 kl_stop=1 intervention_rate=0.0866 front_blocked=0
[Episode 1760] reward=-59893094.5 actor_loss=0.1020 critic_loss=129858134698.6667 entropy=4.1047 approx_kl=0.0036 kl_stop=1 intervention_rate=0.0866 front_blocked=0
[Eval 1760] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-499593.6 mean_steps=13.1
[Episode 1770] reward=-45616551.0 actor_loss=0.1193 critic_loss=125084915029.3333 entropy=4.1025 approx_kl=0.0060 kl_stop=1 intervention_rate=0.0755 front_blocked=0
[Episode 1780] reward=-57307157.5 actor_loss=0.1114 critic_loss=129046518169.6000 entropy=4.1015 approx_kl=0.0058 kl_stop=1 intervention_rate=0.0846 front_blocked=0
[Eval 1780] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-569808.8 mean_steps=12.8
[Episode 1790] reward=-42262091.5 actor_loss=0.1061 critic_loss=121606750208.0000 entropy=4.1003 approx_kl=0.0045 kl_stop=1 intervention_rate=0.0827 front_blocked=0
[Episode 1800] reward=-53635824.7 actor_loss=0.0977 critic_loss=126425826304.0000 entropy=4.1010 approx_kl=0.0074 kl_stop=1 intervention_rate=0.0762 front_blocked=0
[Eval 1800] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-517935.7 mean_steps=13.3
[Episode 1810] reward=-53737733.1 actor_loss=0.0934 critic_loss=127216334438.4000 entropy=4.1022 approx_kl=0.0026 kl_stop=1 intervention_rate=0.0755 front_blocked=0
[Episode 1820] reward=-46955877.5 actor_loss=0.1140 critic_loss=122347761423.0588 entropy=4.1058 approx_kl=0.0054 kl_stop=1 intervention_rate=0.0762 front_blocked=0
[Eval 1820] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-464480.1 mean_steps=13.9
[Episode 1830] reward=-66885765.3 actor_loss=0.1215 critic_loss=133838462345.8462 entropy=4.1052 approx_kl=0.0038 kl_stop=1 intervention_rate=0.0924 front_blocked=0
[Episode 1840] reward=-43894114.0 actor_loss=0.0842 critic_loss=118625935360.0000 entropy=4.1013 approx_kl=0.0058 kl_stop=1 intervention_rate=0.0749 front_blocked=0
[Eval 1840] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-484269.9 mean_steps=13.8
[Episode 1850] reward=-43337931.3 actor_loss=0.0958 critic_loss=122563736669.0909 entropy=4.1052 approx_kl=0.0038 kl_stop=1 intervention_rate=0.0671 front_blocked=0
[Episode 1860] reward=-51426133.3 actor_loss=0.1048 critic_loss=127713593753.6000 entropy=4.1080 approx_kl=0.0059 kl_stop=1 intervention_rate=0.0846 front_blocked=0
[Eval 1860] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-342477.1 mean_steps=15.6
[Episode 1870] reward=-64604007.5 actor_loss=0.1117 critic_loss=136170590208.0000 entropy=4.1073 approx_kl=0.0025 kl_stop=1 intervention_rate=0.0840 front_blocked=0
[Episode 1880] reward=-50715001.3 actor_loss=0.0999 critic_loss=126593768925.8667 entropy=4.1079 approx_kl=0.0056 kl_stop=1 intervention_rate=0.0788 front_blocked=0
[Eval 1880] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-332131.6 mean_steps=15.3
[Episode 1890] reward=-48145432.1 actor_loss=0.1172 critic_loss=122170974208.0000 entropy=4.1113 approx_kl=0.0058 kl_stop=1 intervention_rate=0.0807 front_blocked=0
[Episode 1900] reward=-56335006.4 actor_loss=0.1026 critic_loss=134843331730.2857 entropy=4.1182 approx_kl=0.0050 kl_stop=1 intervention_rate=0.0794 front_blocked=0
[Eval 1900] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-552510.3 mean_steps=12.1
[Episode 1910] reward=-54244618.3 actor_loss=0.1115 critic_loss=130538257612.8000 entropy=4.1196 approx_kl=0.0077 kl_stop=1 intervention_rate=0.0794 front_blocked=0
[Episode 1920] reward=-62376560.5 actor_loss=0.1144 critic_loss=133412973226.6667 entropy=4.1110 approx_kl=0.0043 kl_stop=1 intervention_rate=0.0938 front_blocked=0
[Eval 1920] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-565676.8 mean_steps=12.8
[Episode 1930] reward=-52171312.4 actor_loss=0.1141 critic_loss=122638569016.8889 entropy=4.1099 approx_kl=0.0043 kl_stop=1 intervention_rate=0.0820 front_blocked=0
[Episode 1940] reward=-44648368.4 actor_loss=0.1185 critic_loss=120700829013.3333 entropy=4.1117 approx_kl=0.0055 kl_stop=1 intervention_rate=0.0755 front_blocked=0
[Eval 1940] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-593978.5 mean_steps=13.1
[Episode 1950] reward=-53825960.4 actor_loss=0.1081 critic_loss=123371027849.8462 entropy=4.1146 approx_kl=0.0055 kl_stop=1 intervention_rate=0.0794 front_blocked=0
[Episode 1960] reward=-47897343.8 actor_loss=0.0895 critic_loss=123502098724.5714 entropy=4.1146 approx_kl=0.0037 kl_stop=1 intervention_rate=0.0716 front_blocked=0
[Eval 1960] success_rate=0.650 qp_infeasible_rate=0.350 mean_return=-275547.2 mean_steps=16.7
[Episode 1970] reward=-60110745.0 actor_loss=0.1073 critic_loss=131001267541.3333 entropy=4.1119 approx_kl=0.0022 kl_stop=1 intervention_rate=0.0820 front_blocked=0
[Episode 1980] reward=-60149643.5 actor_loss=0.1597 critic_loss=132194290346.6667 entropy=4.1127 approx_kl=0.0035 kl_stop=1 intervention_rate=0.0944 front_blocked=0
[Eval 1980] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-500222.6 mean_steps=12.4
[Episode 1990] reward=-58052699.0 actor_loss=0.0968 critic_loss=127111420691.6923 entropy=4.1097 approx_kl=0.0039 kl_stop=1 intervention_rate=0.0846 front_blocked=0
[Episode 2000] reward=-50553780.2 actor_loss=0.1176 critic_loss=122736410624.0000 entropy=4.1136 approx_kl=0.0041 kl_stop=1 intervention_rate=0.0775 front_blocked=0
[Eval 2000] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-597878.6 mean_steps=12.4
[Episode 2010] reward=-61097210.8 actor_loss=0.1148 critic_loss=135505344039.3846 entropy=4.1122 approx_kl=0.0068 kl_stop=1 intervention_rate=0.0879 front_blocked=0
[Episode 2020] reward=-66019344.0 actor_loss=0.1363 critic_loss=134195485809.7778 entropy=4.1080 approx_kl=0.0026 kl_stop=1 intervention_rate=0.0846 front_blocked=0
[Eval 2020] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-530260.7 mean_steps=13.4
[Episode 2030] reward=-47997705.2 actor_loss=0.1214 critic_loss=120511323648.0000 entropy=4.1073 approx_kl=0.0072 kl_stop=1 intervention_rate=0.0801 front_blocked=0
[Episode 2040] reward=-58186227.1 actor_loss=0.1085 critic_loss=128766127104.0000 entropy=4.1055 approx_kl=0.0032 kl_stop=1 intervention_rate=0.0801 front_blocked=0
[Eval 2040] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-515825.2 mean_steps=13.1
[Episode 2050] reward=-53251713.5 actor_loss=0.1006 critic_loss=127986003595.6364 entropy=4.1068 approx_kl=0.0042 kl_stop=1 intervention_rate=0.0729 front_blocked=0
[Episode 2060] reward=-47793388.7 actor_loss=0.1032 critic_loss=119969803342.7692 entropy=4.1048 approx_kl=0.0023 kl_stop=1 intervention_rate=0.0749 front_blocked=0
[Eval 2060] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-457830.4 mean_steps=14.7
[Episode 2070] reward=-58094886.3 actor_loss=0.1032 critic_loss=131631124935.1111 entropy=4.1047 approx_kl=0.0013 kl_stop=1 intervention_rate=0.0872 front_blocked=0
[Episode 2080] reward=-62022637.7 actor_loss=0.1248 critic_loss=132202976256.0000 entropy=4.1056 approx_kl=0.0041 kl_stop=1 intervention_rate=0.0807 front_blocked=0
[Eval 2080] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-566687.5 mean_steps=12.9
[Episode 2090] reward=-59677451.5 actor_loss=0.1314 critic_loss=133043234406.4000 entropy=4.1061 approx_kl=0.0066 kl_stop=1 intervention_rate=0.0911 front_blocked=0
[Episode 2100] reward=-52736628.4 actor_loss=0.1094 critic_loss=126730894615.2727 entropy=4.1098 approx_kl=0.0050 kl_stop=1 intervention_rate=0.0853 front_blocked=0
[Eval 2100] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-380932.3 mean_steps=14.9
[Episode 2110] reward=-45438081.8 actor_loss=0.0894 critic_loss=120516078592.0000 entropy=4.1109 approx_kl=0.0053 kl_stop=1 intervention_rate=0.0697 front_blocked=0
[Episode 2120] reward=-59053081.6 actor_loss=0.1189 critic_loss=132903421659.4286 entropy=4.1118 approx_kl=0.0049 kl_stop=1 intervention_rate=0.0833 front_blocked=0
[Eval 2120] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-533755.2 mean_steps=13.7
[Episode 2130] reward=-58984183.1 actor_loss=0.1099 critic_loss=133674172416.0000 entropy=4.1133 approx_kl=0.0046 kl_stop=1 intervention_rate=0.0866 front_blocked=0
[Episode 2140] reward=-53092953.2 actor_loss=0.1215 critic_loss=127028244480.0000 entropy=4.1135 approx_kl=0.0065 kl_stop=1 intervention_rate=0.0840 front_blocked=0
[Eval 2140] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-341878.2 mean_steps=14.6
[Episode 2150] reward=-55860804.8 actor_loss=0.1142 critic_loss=129084728115.2000 entropy=4.1116 approx_kl=0.0061 kl_stop=1 intervention_rate=0.0846 front_blocked=0
[Episode 2160] reward=-57811579.5 actor_loss=0.1151 critic_loss=129938644992.0000 entropy=4.1090 approx_kl=0.0049 kl_stop=1 intervention_rate=0.0742 front_blocked=0
[Eval 2160] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-451164.6 mean_steps=13.0
[Episode 2170] reward=-57727112.0 actor_loss=0.1171 critic_loss=131919773696.0000 entropy=4.1068 approx_kl=0.0044 kl_stop=1 intervention_rate=0.0846 front_blocked=0
[Episode 2180] reward=-57253067.7 actor_loss=0.1102 critic_loss=129191145256.4211 entropy=4.1065 approx_kl=0.0071 kl_stop=1 intervention_rate=0.0872 front_blocked=0
[Eval 2180] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-435730.6 mean_steps=14.5
[Episode 2190] reward=-63911544.8 actor_loss=0.1413 critic_loss=132139391763.6923 entropy=4.1075 approx_kl=0.0046 kl_stop=1 intervention_rate=0.0853 front_blocked=0
[Episode 2200] reward=-50652538.6 actor_loss=0.0843 critic_loss=122618056021.3333 entropy=4.1092 approx_kl=0.0035 kl_stop=1 intervention_rate=0.0788 front_blocked=0
[Eval 2200] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-539983.0 mean_steps=12.6
[Episode 2210] reward=-55679439.0 actor_loss=0.1218 critic_loss=125372477440.0000 entropy=4.1099 approx_kl=0.0049 kl_stop=1 intervention_rate=0.0814 front_blocked=0
[Episode 2220] reward=-56765586.5 actor_loss=0.0876 critic_loss=124803578733.7143 entropy=4.1020 approx_kl=0.0049 kl_stop=1 intervention_rate=0.0807 front_blocked=0
[Eval 2220] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-330493.1 mean_steps=15.3
[Episode 2230] reward=-51332758.3 actor_loss=0.0723 critic_loss=126611473203.2000 entropy=4.1068 approx_kl=0.0054 kl_stop=1 intervention_rate=0.0664 front_blocked=0
[Episode 2240] reward=-51370593.8 actor_loss=0.0928 critic_loss=122411043108.5714 entropy=4.1099 approx_kl=0.0057 kl_stop=1 intervention_rate=0.0762 front_blocked=0
[Eval 2240] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-521038.9 mean_steps=13.3
[Episode 2250] reward=-51404424.3 actor_loss=0.1057 critic_loss=124272687662.5455 entropy=4.1107 approx_kl=0.0015 kl_stop=1 intervention_rate=0.0710 front_blocked=0
[Episode 2260] reward=-54720523.9 actor_loss=0.1066 critic_loss=129729159168.0000 entropy=4.1113 approx_kl=0.0034 kl_stop=1 intervention_rate=0.0775 front_blocked=0
[Eval 2260] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-470308.9 mean_steps=13.2
[Episode 2270] reward=-53686016.7 actor_loss=0.1218 critic_loss=121945927406.9333 entropy=4.1139 approx_kl=0.0056 kl_stop=1 intervention_rate=0.0866 front_blocked=0
[Episode 2280] reward=-46316850.1 actor_loss=0.1358 critic_loss=122088936541.0909 entropy=4.1155 approx_kl=0.0068 kl_stop=1 intervention_rate=0.0814 front_blocked=0
[Eval 2280] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-459056.0 mean_steps=13.7
[Episode 2290] reward=-49368981.9 actor_loss=0.1082 critic_loss=125042404556.8000 entropy=4.1135 approx_kl=0.0044 kl_stop=1 intervention_rate=0.0788 front_blocked=0
[Episode 2300] reward=-49752628.5 actor_loss=0.1209 critic_loss=126771636906.6667 entropy=4.1132 approx_kl=0.0044 kl_stop=1 intervention_rate=0.0768 front_blocked=0
[Eval 2300] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-503648.9 mean_steps=14.2
[Episode 2310] reward=-56997696.0 actor_loss=0.0703 critic_loss=124612810342.4000 entropy=4.1175 approx_kl=0.0038 kl_stop=1 intervention_rate=0.0833 front_blocked=0
[Episode 2320] reward=-55075406.4 actor_loss=0.1158 critic_loss=125157334220.8000 entropy=4.1165 approx_kl=0.0031 kl_stop=1 intervention_rate=0.0788 front_blocked=0
[Eval 2320] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-348482.1 mean_steps=15.3
[Episode 2330] reward=-50935918.1 actor_loss=0.0962 critic_loss=125790231893.3333 entropy=4.1142 approx_kl=0.0037 kl_stop=1 intervention_rate=0.0801 front_blocked=0
[Episode 2340] reward=-53221018.4 actor_loss=0.1075 critic_loss=126279459779.7647 entropy=4.1162 approx_kl=0.0040 kl_stop=1 intervention_rate=0.0820 front_blocked=0
[Eval 2340] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-398128.2 mean_steps=14.1
[Episode 2350] reward=-51377278.9 actor_loss=0.0934 critic_loss=126458614784.0000 entropy=4.1166 approx_kl=0.0024 kl_stop=1 intervention_rate=0.0755 front_blocked=0
[Episode 2360] reward=-49366122.8 actor_loss=0.1256 critic_loss=128041202119.1111 entropy=4.1138 approx_kl=0.0041 kl_stop=1 intervention_rate=0.0820 front_blocked=0
[Eval 2360] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-569903.4 mean_steps=12.6
[Episode 2370] reward=-55374919.8 actor_loss=0.1183 critic_loss=127954675302.4000 entropy=4.1139 approx_kl=0.0058 kl_stop=1 intervention_rate=0.0788 front_blocked=0
[Episode 2380] reward=-49305087.8 actor_loss=0.1152 critic_loss=124100232988.4444 entropy=4.1047 approx_kl=0.0044 kl_stop=1 intervention_rate=0.0794 front_blocked=0
[Eval 2380] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-391679.1 mean_steps=14.0
[Episode 2390] reward=-46177831.8 actor_loss=0.0731 critic_loss=121602945479.1111 entropy=4.1021 approx_kl=0.0054 kl_stop=1 intervention_rate=0.0703 front_blocked=0
[Episode 2400] reward=-50684262.7 actor_loss=0.0964 critic_loss=126569316352.0000 entropy=4.1010 approx_kl=0.0059 kl_stop=1 intervention_rate=0.0827 front_blocked=0
[Eval 2400] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-457206.4 mean_steps=14.4
[Episode 2410] reward=-61728152.4 actor_loss=0.1280 critic_loss=126377949184.0000 entropy=4.0962 approx_kl=0.0058 kl_stop=1 intervention_rate=0.0951 front_blocked=0
[Episode 2420] reward=-62158287.1 actor_loss=0.1095 critic_loss=129398481806.2222 entropy=4.0977 approx_kl=0.0066 kl_stop=1 intervention_rate=0.0820 front_blocked=0
[Eval 2420] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-392028.6 mean_steps=14.6
[Episode 2430] reward=-50496506.2 actor_loss=0.1043 critic_loss=128043382338.7826 entropy=4.0923 approx_kl=0.0030 kl_stop=1 intervention_rate=0.0742 front_blocked=0
[Episode 2440] reward=-60174604.8 actor_loss=0.1112 critic_loss=133309836194.9091 entropy=4.0850 approx_kl=0.0051 kl_stop=1 intervention_rate=0.0885 front_blocked=0
[Eval 2440] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-511325.8 mean_steps=12.2
[Episode 2450] reward=-47899479.8 actor_loss=0.0966 critic_loss=126186231621.8182 entropy=4.0841 approx_kl=0.0058 kl_stop=1 intervention_rate=0.0814 front_blocked=0
[Episode 2460] reward=-63350557.3 actor_loss=0.1397 critic_loss=135464023220.7059 entropy=4.0862 approx_kl=0.0043 kl_stop=1 intervention_rate=0.1009 front_blocked=0
[Eval 2460] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-603709.1 mean_steps=12.2
[Episode 2470] reward=-54244339.2 actor_loss=0.1354 critic_loss=130248319561.1429 entropy=4.0855 approx_kl=0.0015 kl_stop=1 intervention_rate=0.0820 front_blocked=0
[Episode 2480] reward=-46549441.4 actor_loss=0.1110 critic_loss=123797443072.0000 entropy=4.0855 approx_kl=0.0039 kl_stop=1 intervention_rate=0.0723 front_blocked=0
[Eval 2480] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-607315.8 mean_steps=12.9
[Episode 2490] reward=-42477562.6 actor_loss=0.1035 critic_loss=119658737078.8571 entropy=4.0832 approx_kl=0.0064 kl_stop=1 intervention_rate=0.0710 front_blocked=0
[Episode 2500] reward=-58063911.8 actor_loss=0.1246 critic_loss=134092347030.5882 entropy=4.0846 approx_kl=0.0028 kl_stop=1 intervention_rate=0.0879 front_blocked=0
[Eval 2500] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-470013.4 mean_steps=13.6
[Episode 2510] reward=-51297122.7 actor_loss=0.1394 critic_loss=125850200536.6154 entropy=4.0826 approx_kl=0.0039 kl_stop=1 intervention_rate=0.0827 front_blocked=0
[Episode 2520] reward=-41491802.6 actor_loss=0.0833 critic_loss=122653429350.4000 entropy=4.0804 approx_kl=0.0040 kl_stop=1 intervention_rate=0.0749 front_blocked=0
[Eval 2520] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-468390.7 mean_steps=12.3
[Episode 2530] reward=-52642270.7 actor_loss=0.0860 critic_loss=124533534993.0667 entropy=4.0824 approx_kl=0.0039 kl_stop=1 intervention_rate=0.0807 front_blocked=0
[Episode 2540] reward=-56702521.7 actor_loss=0.0984 critic_loss=132166243508.7059 entropy=4.0842 approx_kl=0.0060 kl_stop=1 intervention_rate=0.0781 front_blocked=0
[Eval 2540] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-591391.3 mean_steps=12.6
[Episode 2550] reward=-51124481.5 actor_loss=0.1333 critic_loss=122110087753.1429 entropy=4.0855 approx_kl=0.0054 kl_stop=1 intervention_rate=0.0833 front_blocked=0
[Episode 2560] reward=-62973923.9 actor_loss=0.1059 critic_loss=131915593255.3846 entropy=4.0832 approx_kl=0.0035 kl_stop=1 intervention_rate=0.0872 front_blocked=0
[Eval 2560] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-337882.8 mean_steps=16.1
[Episode 2570] reward=-58741160.5 actor_loss=0.1160 critic_loss=130396174493.5385 entropy=4.0803 approx_kl=0.0042 kl_stop=1 intervention_rate=0.0892 front_blocked=0
[Episode 2580] reward=-55047614.1 actor_loss=0.1023 critic_loss=126880549156.5714 entropy=4.0836 approx_kl=0.0067 kl_stop=1 intervention_rate=0.0775 front_blocked=0
[Eval 2580] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-536592.5 mean_steps=13.4
[Episode 2590] reward=-60623708.3 actor_loss=0.1376 critic_loss=130235207680.0000 entropy=4.0851 approx_kl=0.0048 kl_stop=1 intervention_rate=0.0892 front_blocked=0
[Episode 2600] reward=-69100952.2 actor_loss=0.1350 critic_loss=139237128484.5714 entropy=4.0866 approx_kl=0.0047 kl_stop=1 intervention_rate=0.0924 front_blocked=0
[Eval 2600] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-515092.6 mean_steps=12.4
[Episode 2610] reward=-50469892.3 actor_loss=0.1137 critic_loss=127109105436.4444 entropy=4.0826 approx_kl=0.0056 kl_stop=1 intervention_rate=0.0885 front_blocked=0
[Episode 2620] reward=-58979046.4 actor_loss=0.1247 critic_loss=131922114787.5556 entropy=4.0810 approx_kl=0.0054 kl_stop=1 intervention_rate=0.0879 front_blocked=0
[Eval 2620] success_rate=0.600 qp_infeasible_rate=0.400 mean_return=-321290.4 mean_steps=15.7
[Episode 2630] reward=-62590392.8 actor_loss=0.1064 critic_loss=132850024448.0000 entropy=4.0807 approx_kl=0.0030 kl_stop=1 intervention_rate=0.0879 front_blocked=0
[Episode 2640] reward=-49080008.5 actor_loss=0.1113 critic_loss=124671257506.9091 entropy=4.0745 approx_kl=0.0032 kl_stop=1 intervention_rate=0.0859 front_blocked=0
[Eval 2640] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-388597.7 mean_steps=13.8
[Episode 2650] reward=-48831408.8 actor_loss=0.1168 critic_loss=124822804118.5882 entropy=4.0753 approx_kl=0.0033 kl_stop=1 intervention_rate=0.0794 front_blocked=0
[Episode 2660] reward=-58420271.0 actor_loss=0.1294 critic_loss=126694011252.3636 entropy=4.0714 approx_kl=0.0061 kl_stop=1 intervention_rate=0.0892 front_blocked=0
[Eval 2660] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-424711.3 mean_steps=13.2
[Episode 2670] reward=-56192296.9 actor_loss=0.1630 critic_loss=127642537984.0000 entropy=4.0679 approx_kl=0.0021 kl_stop=1 intervention_rate=0.0853 front_blocked=0
[Episode 2680] reward=-55336124.6 actor_loss=0.0978 critic_loss=131508264960.0000 entropy=4.0645 approx_kl=0.0077 kl_stop=1 intervention_rate=0.0807 front_blocked=0
[Eval 2680] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-614343.4 mean_steps=11.6
[Episode 2690] reward=-52514315.2 actor_loss=0.1056 critic_loss=128621187794.8235 entropy=4.0616 approx_kl=0.0059 kl_stop=1 intervention_rate=0.0807 front_blocked=0
[Episode 2700] reward=-53414912.9 actor_loss=0.0952 critic_loss=123294871096.8889 entropy=4.0603 approx_kl=0.0059 kl_stop=1 intervention_rate=0.0788 front_blocked=0
[Eval 2700] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-475439.4 mean_steps=13.6
[Episode 2710] reward=-54291862.3 actor_loss=0.1164 critic_loss=129086169998.2222 entropy=4.0585 approx_kl=0.0058 kl_stop=1 intervention_rate=0.0788 front_blocked=0
[Episode 2720] reward=-51276674.4 actor_loss=0.1043 critic_loss=122240316757.3333 entropy=4.0549 approx_kl=0.0041 kl_stop=1 intervention_rate=0.0807 front_blocked=0
[Eval 2720] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-463247.1 mean_steps=13.7
[Episode 2730] reward=-49445391.2 actor_loss=0.1015 critic_loss=119637042907.4286 entropy=4.0583 approx_kl=0.0029 kl_stop=1 intervention_rate=0.0833 front_blocked=0
[Episode 2740] reward=-44265241.4 actor_loss=0.1197 critic_loss=122682785109.3333 entropy=4.0606 approx_kl=0.0047 kl_stop=1 intervention_rate=0.0859 front_blocked=0
[Eval 2740] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-470697.7 mean_steps=12.9
[Episode 2750] reward=-55176621.7 actor_loss=0.1111 critic_loss=127228205056.0000 entropy=4.0589 approx_kl=0.0039 kl_stop=1 intervention_rate=0.0820 front_blocked=0
[Episode 2760] reward=-61101428.4 actor_loss=0.1324 critic_loss=129705571421.0909 entropy=4.0573 approx_kl=0.0058 kl_stop=1 intervention_rate=0.0846 front_blocked=0
[Eval 2760] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-479056.7 mean_steps=13.8
[Episode 2770] reward=-63531156.6 actor_loss=0.1244 critic_loss=130687472128.0000 entropy=4.0564 approx_kl=0.0056 kl_stop=1 intervention_rate=0.0892 front_blocked=0
[Episode 2780] reward=-49962268.7 actor_loss=0.0818 critic_loss=124679709582.2222 entropy=4.0569 approx_kl=0.0066 kl_stop=1 intervention_rate=0.0788 front_blocked=0
[Eval 2780] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-409118.5 mean_steps=14.3
[Episode 2790] reward=-53447691.6 actor_loss=0.1235 critic_loss=124535080960.0000 entropy=4.0562 approx_kl=0.0017 kl_stop=1 intervention_rate=0.0827 front_blocked=0
[Episode 2800] reward=-53002832.3 actor_loss=0.1265 critic_loss=127605224541.0909 entropy=4.0578 approx_kl=0.0034 kl_stop=1 intervention_rate=0.0807 front_blocked=0
[Eval 2800] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-469097.4 mean_steps=13.8
[Episode 2810] reward=-51024793.5 actor_loss=0.1203 critic_loss=121881255305.8462 entropy=4.0602 approx_kl=0.0066 kl_stop=1 intervention_rate=0.0814 front_blocked=0
[Episode 2820] reward=-64001429.7 actor_loss=0.1129 critic_loss=133879192780.8000 entropy=4.0628 approx_kl=0.0048 kl_stop=1 intervention_rate=0.0911 front_blocked=0
[Eval 2820] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-434531.8 mean_steps=14.3
[Episode 2830] reward=-49030859.7 actor_loss=0.1154 critic_loss=121297442133.3333 entropy=4.0656 approx_kl=0.0062 kl_stop=1 intervention_rate=0.0840 front_blocked=0
[Episode 2840] reward=-45845435.0 actor_loss=0.1133 critic_loss=123441187498.6667 entropy=4.0648 approx_kl=0.0027 kl_stop=1 intervention_rate=0.0729 front_blocked=0
[Eval 2840] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-575302.3 mean_steps=12.0
[Episode 2850] reward=-58654868.5 actor_loss=0.1172 critic_loss=132704225280.0000 entropy=4.0658 approx_kl=0.0033 kl_stop=1 intervention_rate=0.0820 front_blocked=0
[Episode 2860] reward=-39712677.0 actor_loss=0.1223 critic_loss=120213463950.2222 entropy=4.0663 approx_kl=0.0042 kl_stop=1 intervention_rate=0.0664 front_blocked=0
[Eval 2860] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-314546.7 mean_steps=14.1
[Episode 2870] reward=-61186092.2 actor_loss=0.1304 critic_loss=130846037723.4286 entropy=4.0657 approx_kl=0.0050 kl_stop=1 intervention_rate=0.0938 front_blocked=0
[Episode 2880] reward=-56658955.8 actor_loss=0.0905 critic_loss=127573489664.0000 entropy=4.0647 approx_kl=0.0048 kl_stop=1 intervention_rate=0.0820 front_blocked=0
[Eval 2880] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-668750.6 mean_steps=11.2
[Episode 2890] reward=-70969346.6 actor_loss=0.1282 critic_loss=136946633728.0000 entropy=4.0665 approx_kl=0.0047 kl_stop=1 intervention_rate=0.0898 front_blocked=0
[Episode 2900] reward=-57748671.5 actor_loss=0.1515 critic_loss=128467990341.8182 entropy=4.0650 approx_kl=0.0025 kl_stop=1 intervention_rate=0.0846 front_blocked=0
[Eval 2900] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-433234.9 mean_steps=14.2
[Episode 2910] reward=-71292570.5 actor_loss=0.1022 critic_loss=140893297427.6923 entropy=4.0656 approx_kl=0.0040 kl_stop=1 intervention_rate=0.0924 front_blocked=0
[Episode 2920] reward=-59874503.9 actor_loss=0.1006 critic_loss=128139698176.0000 entropy=4.0656 approx_kl=0.0049 kl_stop=1 intervention_rate=0.0859 front_blocked=0
[Eval 2920] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-447389.2 mean_steps=12.8
[Episode 2930] reward=-53389316.8 actor_loss=0.0973 critic_loss=122899446637.7143 entropy=4.0644 approx_kl=0.0056 kl_stop=1 intervention_rate=0.0853 front_blocked=0
[Episode 2940] reward=-62619676.2 actor_loss=0.0970 critic_loss=132427279798.8571 entropy=4.0628 approx_kl=0.0042 kl_stop=1 intervention_rate=0.0892 front_blocked=0
[Eval 2940] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-644034.4 mean_steps=11.5
[Episode 2950] reward=-62968893.0 actor_loss=0.1165 critic_loss=129391875218.2857 entropy=4.0604 approx_kl=0.0043 kl_stop=1 intervention_rate=0.0951 front_blocked=0
[Episode 2960] reward=-54718895.4 actor_loss=0.1032 critic_loss=130222655078.4000 entropy=4.0621 approx_kl=0.0050 kl_stop=1 intervention_rate=0.0788 front_blocked=0
[Eval 2960] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-478896.2 mean_steps=12.9
[Episode 2970] reward=-55523305.2 actor_loss=0.0955 critic_loss=125668175872.0000 entropy=4.0655 approx_kl=0.0064 kl_stop=1 intervention_rate=0.0840 front_blocked=0
[Episode 2980] reward=-65585493.8 actor_loss=0.1383 critic_loss=136272778035.2000 entropy=4.0673 approx_kl=0.0040 kl_stop=1 intervention_rate=0.0885 front_blocked=0
[Eval 2980] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-584466.2 mean_steps=11.2
[Episode 2990] reward=-60379226.0 actor_loss=0.1551 critic_loss=128249754055.1111 entropy=4.0693 approx_kl=0.0072 kl_stop=1 intervention_rate=0.0885 front_blocked=0
[Episode 3000] reward=-59128603.8 actor_loss=0.1307 critic_loss=130451791052.8000 entropy=4.0718 approx_kl=0.0064 kl_stop=1 intervention_rate=0.0853 front_blocked=0
[Eval 3000] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-406366.7 mean_steps=14.0
[Episode 3010] reward=-62566276.5 actor_loss=0.1213 critic_loss=134647378602.6667 entropy=4.0696 approx_kl=0.0068 kl_stop=1 intervention_rate=0.0879 front_blocked=0
[Episode 3020] reward=-44465953.5 actor_loss=0.1215 critic_loss=119538259285.3333 entropy=4.0674 approx_kl=0.0030 kl_stop=1 intervention_rate=0.0814 front_blocked=0
[Eval 3020] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-498912.8 mean_steps=13.1
[Episode 3030] reward=-57292886.4 actor_loss=0.1245 critic_loss=125211068258.4615 entropy=4.0637 approx_kl=0.0062 kl_stop=1 intervention_rate=0.0846 front_blocked=0
[Episode 3040] reward=-55092284.6 actor_loss=0.0983 critic_loss=129864942153.1429 entropy=4.0625 approx_kl=0.0041 kl_stop=1 intervention_rate=0.0833 front_blocked=0
[Eval 3040] success_rate=0.750 qp_infeasible_rate=0.250 mean_return=-178721.7 mean_steps=17.4
[Episode 3050] reward=-48381885.1 actor_loss=0.0968 critic_loss=124105527055.0588 entropy=4.0646 approx_kl=0.0064 kl_stop=1 intervention_rate=0.0833 front_blocked=0
[Episode 3060] reward=-61586551.2 actor_loss=0.1272 critic_loss=133182797141.3333 entropy=4.0634 approx_kl=0.0050 kl_stop=1 intervention_rate=0.0879 front_blocked=0
[Eval 3060] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-621334.6 mean_steps=12.4
[Episode 3070] reward=-59158842.7 actor_loss=0.1346 critic_loss=126790092613.8182 entropy=4.0603 approx_kl=0.0018 kl_stop=1 intervention_rate=0.0879 front_blocked=0
[Episode 3080] reward=-50946734.0 actor_loss=0.1188 critic_loss=126691521331.2000 entropy=4.0622 approx_kl=0.0033 kl_stop=1 intervention_rate=0.0846 front_blocked=0
[Eval 3080] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-534921.4 mean_steps=12.3
[Episode 3090] reward=-57313348.1 actor_loss=0.0920 critic_loss=127067557888.0000 entropy=4.0643 approx_kl=0.0061 kl_stop=1 intervention_rate=0.0885 front_blocked=0
[Episode 3100] reward=-64080739.0 actor_loss=0.1277 critic_loss=132477153553.0667 entropy=4.0652 approx_kl=0.0043 kl_stop=1 intervention_rate=0.0964 front_blocked=0
[Eval 3100] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-448019.7 mean_steps=13.4
[Episode 3110] reward=-50306498.4 actor_loss=0.0963 critic_loss=126217647835.4286 entropy=4.0675 approx_kl=0.0026 kl_stop=1 intervention_rate=0.0697 front_blocked=0
[Episode 3120] reward=-63354573.2 actor_loss=0.1255 critic_loss=133842495849.4118 entropy=4.0659 approx_kl=0.0052 kl_stop=1 intervention_rate=0.0905 front_blocked=0
[Eval 3120] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-599781.5 mean_steps=13.4
[Episode 3130] reward=-41416817.3 actor_loss=0.1151 critic_loss=119927657813.3333 entropy=4.0674 approx_kl=0.0046 kl_stop=1 intervention_rate=0.0716 front_blocked=0
[Episode 3140] reward=-49481672.3 actor_loss=0.0865 critic_loss=126298949474.4615 entropy=4.0696 approx_kl=0.0057 kl_stop=1 intervention_rate=0.0742 front_blocked=0
[Eval 3140] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-456027.6 mean_steps=13.6
[Episode 3150] reward=-52761216.3 actor_loss=0.0985 critic_loss=128887296585.1429 entropy=4.0716 approx_kl=0.0057 kl_stop=1 intervention_rate=0.0814 front_blocked=0
[Episode 3160] reward=-57686304.4 actor_loss=0.1094 critic_loss=127995772928.0000 entropy=4.0678 approx_kl=0.0068 kl_stop=1 intervention_rate=0.0898 front_blocked=0
[Eval 3160] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-566384.7 mean_steps=11.8
[Episode 3170] reward=-64497031.2 actor_loss=0.1184 critic_loss=134727575365.8182 entropy=4.0705 approx_kl=0.0033 kl_stop=1 intervention_rate=0.0872 front_blocked=0
[Episode 3180] reward=-45500190.8 actor_loss=0.0986 critic_loss=120798445568.0000 entropy=4.0735 approx_kl=0.0038 kl_stop=1 intervention_rate=0.0781 front_blocked=0
[Eval 3180] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-413639.0 mean_steps=14.8
[Episode 3190] reward=-55532356.1 actor_loss=0.0944 critic_loss=126918149734.4000 entropy=4.0769 approx_kl=0.0033 kl_stop=1 intervention_rate=0.0755 front_blocked=0
[Episode 3200] reward=-43858653.7 actor_loss=0.1110 critic_loss=121558801171.6923 entropy=4.0798 approx_kl=0.0029 kl_stop=1 intervention_rate=0.0716 front_blocked=0
[Eval 3200] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-503432.5 mean_steps=13.0
[Episode 3210] reward=-61952316.2 actor_loss=0.0941 critic_loss=133563325203.6923 entropy=4.0778 approx_kl=0.0048 kl_stop=1 intervention_rate=0.0840 front_blocked=0
[Episode 3220] reward=-58320549.6 actor_loss=0.1067 critic_loss=128719973677.1765 entropy=4.0773 approx_kl=0.0050 kl_stop=1 intervention_rate=0.0853 front_blocked=0
[Eval 3220] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-494444.7 mean_steps=13.8
[Episode 3230] reward=-55476625.3 actor_loss=0.1124 critic_loss=126880107081.1429 entropy=4.0757 approx_kl=0.0053 kl_stop=1 intervention_rate=0.0794 front_blocked=0
[Episode 3240] reward=-56595854.5 actor_loss=0.1265 critic_loss=130422747136.0000 entropy=4.0771 approx_kl=0.0045 kl_stop=1 intervention_rate=0.0814 front_blocked=0
[Eval 3240] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-494465.8 mean_steps=11.9
[Episode 3250] reward=-62052697.1 actor_loss=0.1329 critic_loss=132859876352.0000 entropy=4.0838 approx_kl=0.0056 kl_stop=1 intervention_rate=0.0983 front_blocked=0
[Episode 3260] reward=-58922424.0 actor_loss=0.0996 critic_loss=130803346090.6667 entropy=4.0863 approx_kl=0.0058 kl_stop=1 intervention_rate=0.0814 front_blocked=0
[Eval 3260] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-426274.7 mean_steps=14.1
[Episode 3270] reward=-50327102.1 actor_loss=0.1443 critic_loss=124984524800.0000 entropy=4.0886 approx_kl=0.0048 kl_stop=1 intervention_rate=0.0853 front_blocked=0
[Episode 3280] reward=-50640344.5 actor_loss=0.1078 critic_loss=127345749196.8000 entropy=4.0956 approx_kl=0.0049 kl_stop=1 intervention_rate=0.0814 front_blocked=0
[Eval 3280] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-592067.2 mean_steps=11.3
[Episode 3290] reward=-58047871.8 actor_loss=0.1139 critic_loss=129046447718.4000 entropy=4.0994 approx_kl=0.0052 kl_stop=1 intervention_rate=0.0885 front_blocked=0
[Episode 3300] reward=-54261084.1 actor_loss=0.0992 critic_loss=126739846095.2381 entropy=4.0976 approx_kl=0.0081 kl_stop=1 intervention_rate=0.0827 front_blocked=0
[Eval 3300] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-497461.0 mean_steps=12.9
[Episode 3310] reward=-60381436.3 actor_loss=0.1467 critic_loss=132167462638.9333 entropy=4.1016 approx_kl=0.0042 kl_stop=1 intervention_rate=0.0938 front_blocked=0
[Episode 3320] reward=-59310678.5 actor_loss=0.1125 critic_loss=131269499107.5556 entropy=4.1023 approx_kl=0.0046 kl_stop=1 intervention_rate=0.0911 front_blocked=0
[Eval 3320] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-496043.4 mean_steps=12.2
[Episode 3330] reward=-54153807.3 actor_loss=0.1027 critic_loss=129631736627.2000 entropy=4.0984 approx_kl=0.0032 kl_stop=1 intervention_rate=0.0898 front_blocked=0
[Episode 3340] reward=-58581016.9 actor_loss=0.1073 critic_loss=131766954170.1818 entropy=4.0962 approx_kl=0.0014 kl_stop=1 intervention_rate=0.0781 front_blocked=0
[Eval 3340] success_rate=0.150 qp_infeasible_rate=0.850 mean_return=-639098.7 mean_steps=10.8
[Episode 3350] reward=-68653006.5 actor_loss=0.1049 critic_loss=135522223445.3333 entropy=4.0972 approx_kl=0.0032 kl_stop=1 intervention_rate=0.0977 front_blocked=0
[Episode 3360] reward=-47095049.6 actor_loss=0.1156 critic_loss=123119528122.1818 entropy=4.0953 approx_kl=0.0057 kl_stop=1 intervention_rate=0.0840 front_blocked=0
[Eval 3360] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-339725.0 mean_steps=14.4
[Episode 3370] reward=-53770177.6 actor_loss=0.1406 critic_loss=125520244420.9231 entropy=4.0954 approx_kl=0.0029 kl_stop=1 intervention_rate=0.0898 front_blocked=0
[Episode 3380] reward=-54967751.8 actor_loss=0.1347 critic_loss=126871695086.9333 entropy=4.0915 approx_kl=0.0056 kl_stop=1 intervention_rate=0.0885 front_blocked=0
[Eval 3380] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-627855.8 mean_steps=11.6
[Episode 3390] reward=-62447839.9 actor_loss=0.1233 critic_loss=137448796160.0000 entropy=4.0895 approx_kl=0.0035 kl_stop=1 intervention_rate=0.0931 front_blocked=0
[Episode 3400] reward=-60816540.9 actor_loss=0.1264 critic_loss=131520178585.6000 entropy=4.0841 approx_kl=0.0009 kl_stop=1 intervention_rate=0.0879 front_blocked=0
[Eval 3400] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-415967.0 mean_steps=13.8
[Episode 3410] reward=-51384617.0 actor_loss=0.1141 critic_loss=124961852322.9091 entropy=4.0800 approx_kl=0.0024 kl_stop=1 intervention_rate=0.0801 front_blocked=0
[Episode 3420] reward=-63579687.9 actor_loss=0.0867 critic_loss=136104515811.5556 entropy=4.0789 approx_kl=0.0036 kl_stop=1 intervention_rate=0.0820 front_blocked=0
[Eval 3420] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-393618.0 mean_steps=13.7
[Episode 3430] reward=-68454577.1 actor_loss=0.1384 critic_loss=140678348800.0000 entropy=4.0828 approx_kl=0.0062 kl_stop=1 intervention_rate=0.0918 front_blocked=0
[Episode 3440] reward=-53996561.3 actor_loss=0.1193 critic_loss=128570325401.6000 entropy=4.0816 approx_kl=0.0053 kl_stop=1 intervention_rate=0.0781 front_blocked=0
[Eval 3440] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-429385.8 mean_steps=13.2
[Episode 3450] reward=-56923155.2 actor_loss=0.1405 critic_loss=126982661006.2222 entropy=4.0863 approx_kl=0.0045 kl_stop=1 intervention_rate=0.0911 front_blocked=0
[Episode 3460] reward=-48636735.5 actor_loss=0.0867 critic_loss=124535346062.2222 entropy=4.0837 approx_kl=0.0044 kl_stop=1 intervention_rate=0.0651 front_blocked=0
[Eval 3460] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-412919.7 mean_steps=14.2
[Episode 3470] reward=-43141412.0 actor_loss=0.0845 critic_loss=121093015040.0000 entropy=4.0802 approx_kl=0.0042 kl_stop=1 intervention_rate=0.0762 front_blocked=0
[Episode 3480] reward=-49225550.9 actor_loss=0.1429 critic_loss=122518762057.1429 entropy=4.0806 approx_kl=0.0056 kl_stop=1 intervention_rate=0.0762 front_blocked=0
[Eval 3480] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-510351.4 mean_steps=13.1
[Episode 3490] reward=-54639304.4 actor_loss=0.1136 critic_loss=127527603106.9091 entropy=4.0832 approx_kl=0.0045 kl_stop=1 intervention_rate=0.0853 front_blocked=0
[Episode 3500] reward=-54527447.9 actor_loss=0.1271 critic_loss=128313088409.6000 entropy=4.0850 approx_kl=0.0046 kl_stop=1 intervention_rate=0.0833 front_blocked=0
[Eval 3500] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-505994.4 mean_steps=13.2
[Episode 3510] reward=-58530233.7 actor_loss=0.1265 critic_loss=128228807111.1111 entropy=4.0887 approx_kl=0.0048 kl_stop=1 intervention_rate=0.0807 front_blocked=0
[Episode 3520] reward=-45606221.9 actor_loss=0.1188 critic_loss=119721579861.3333 entropy=4.0896 approx_kl=0.0046 kl_stop=1 intervention_rate=0.0781 front_blocked=0
[Eval 3520] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-511303.9 mean_steps=12.4
[Episode 3530] reward=-61740074.5 actor_loss=0.1229 critic_loss=129361500160.0000 entropy=4.0880 approx_kl=0.0049 kl_stop=1 intervention_rate=0.0911 front_blocked=0
[Episode 3540] reward=-48716221.5 actor_loss=0.0988 critic_loss=123780369920.0000 entropy=4.0892 approx_kl=0.0031 kl_stop=1 intervention_rate=0.0775 front_blocked=0
[Eval 3540] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-523656.0 mean_steps=13.2
[Episode 3550] reward=-46131204.8 actor_loss=0.1225 critic_loss=123938901643.6364 entropy=4.0891 approx_kl=0.0040 kl_stop=1 intervention_rate=0.0840 front_blocked=0
[Episode 3560] reward=-56993431.7 actor_loss=0.1294 critic_loss=126639005696.0000 entropy=4.0922 approx_kl=0.0036 kl_stop=1 intervention_rate=0.0866 front_blocked=0
[Eval 3560] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-376518.2 mean_steps=13.0
[Episode 3570] reward=-42195396.1 actor_loss=0.1038 critic_loss=121279962453.3333 entropy=4.0892 approx_kl=0.0058 kl_stop=1 intervention_rate=0.0710 front_blocked=0
[Episode 3580] reward=-64794617.9 actor_loss=0.1193 critic_loss=136560556590.5455 entropy=4.0887 approx_kl=0.0047 kl_stop=1 intervention_rate=0.0885 front_blocked=0
[Eval 3580] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-623266.3 mean_steps=11.3
[Episode 3590] reward=-56769590.9 actor_loss=0.1075 critic_loss=126413068151.4667 entropy=4.0897 approx_kl=0.0030 kl_stop=1 intervention_rate=0.0853 front_blocked=0
[Episode 3600] reward=-53657228.1 actor_loss=0.1158 critic_loss=123956558961.7778 entropy=4.0912 approx_kl=0.0066 kl_stop=1 intervention_rate=0.0859 front_blocked=0
[Eval 3600] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-452709.7 mean_steps=14.3
[Episode 3610] reward=-50556184.9 actor_loss=0.0840 critic_loss=126229368832.0000 entropy=4.0898 approx_kl=0.0032 kl_stop=1 intervention_rate=0.0710 front_blocked=0
[Episode 3620] reward=-61762970.1 actor_loss=0.1226 critic_loss=133886667980.8000 entropy=4.0878 approx_kl=0.0045 kl_stop=1 intervention_rate=0.0879 front_blocked=0
[Eval 3620] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-304862.4 mean_steps=14.6
[Episode 3630] reward=-61818417.7 actor_loss=0.1197 critic_loss=129066505557.3333 entropy=4.0842 approx_kl=0.0027 kl_stop=1 intervention_rate=0.0924 front_blocked=0
[Episode 3640] reward=-49903130.3 actor_loss=0.1023 critic_loss=123572069171.2000 entropy=4.0833 approx_kl=0.0062 kl_stop=1 intervention_rate=0.0755 front_blocked=0
[Eval 3640] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-399953.6 mean_steps=14.0
[Episode 3650] reward=-49377541.1 actor_loss=0.1132 critic_loss=123472683008.0000 entropy=4.0804 approx_kl=0.0031 kl_stop=1 intervention_rate=0.0820 front_blocked=0
[Episode 3660] reward=-61738569.4 actor_loss=0.0867 critic_loss=135740349440.0000 entropy=4.0779 approx_kl=0.0049 kl_stop=1 intervention_rate=0.0872 front_blocked=0
[Eval 3660] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-619556.8 mean_steps=13.1
[Episode 3670] reward=-60014194.5 actor_loss=0.1294 critic_loss=129706174650.1818 entropy=4.0787 approx_kl=0.0039 kl_stop=1 intervention_rate=0.0983 front_blocked=0
[Episode 3680] reward=-51000175.2 actor_loss=0.1227 critic_loss=122319816890.1818 entropy=4.0809 approx_kl=0.0053 kl_stop=1 intervention_rate=0.0892 front_blocked=0
[Eval 3680] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-572763.8 mean_steps=11.3
[Episode 3690] reward=-49246665.8 actor_loss=0.0994 critic_loss=121416609792.0000 entropy=4.0804 approx_kl=0.0026 kl_stop=1 intervention_rate=0.0755 front_blocked=0
[Episode 3700] reward=-47808323.7 actor_loss=0.1256 critic_loss=123752857600.0000 entropy=4.0794 approx_kl=0.0046 kl_stop=1 intervention_rate=0.0788 front_blocked=0
[Eval 3700] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-370578.5 mean_steps=14.0
[Episode 3710] reward=-52716113.4 actor_loss=0.1298 critic_loss=128088209895.6190 entropy=4.0821 approx_kl=0.0053 kl_stop=1 intervention_rate=0.0866 front_blocked=0
[Episode 3720] reward=-47296144.5 actor_loss=0.1401 critic_loss=119924291584.0000 entropy=4.0815 approx_kl=0.0047 kl_stop=1 intervention_rate=0.0781 front_blocked=0
[Eval 3720] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-413420.8 mean_steps=14.2
[Episode 3730] reward=-61640016.6 actor_loss=0.1121 critic_loss=130284368554.6667 entropy=4.0838 approx_kl=0.0036 kl_stop=1 intervention_rate=0.0840 front_blocked=0
[Episode 3740] reward=-54762747.4 actor_loss=0.1238 critic_loss=127020755874.9091 entropy=4.0844 approx_kl=0.0056 kl_stop=1 intervention_rate=0.0853 front_blocked=0
[Eval 3740] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-443534.5 mean_steps=14.0
[Episode 3750] reward=-54067167.0 actor_loss=0.1094 critic_loss=128080893952.0000 entropy=4.0827 approx_kl=0.0064 kl_stop=1 intervention_rate=0.0820 front_blocked=0
[Episode 3760] reward=-53170956.5 actor_loss=0.0864 critic_loss=124860686882.1333 entropy=4.0866 approx_kl=0.0027 kl_stop=1 intervention_rate=0.0775 front_blocked=0
[Eval 3760] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-550293.9 mean_steps=13.1
[Episode 3770] reward=-57119861.5 actor_loss=0.1033 critic_loss=127726687118.2222 entropy=4.0853 approx_kl=0.0080 kl_stop=1 intervention_rate=0.0866 front_blocked=0
[Episode 3780] reward=-51956638.6 actor_loss=0.1001 critic_loss=126395114382.2222 entropy=4.0873 approx_kl=0.0035 kl_stop=1 intervention_rate=0.0749 front_blocked=0
[Eval 3780] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-436870.7 mean_steps=14.3
[Episode 3790] reward=-56438839.5 actor_loss=0.0950 critic_loss=124484073629.5385 entropy=4.0877 approx_kl=0.0043 kl_stop=1 intervention_rate=0.0762 front_blocked=0
[Episode 3800] reward=-45245897.7 actor_loss=0.0979 critic_loss=120207989418.6667 entropy=4.0905 approx_kl=0.0043 kl_stop=1 intervention_rate=0.0729 front_blocked=0
[Eval 3800] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-466073.8 mean_steps=12.9
[Episode 3810] reward=-50898428.7 actor_loss=0.0939 critic_loss=123223343104.0000 entropy=4.0929 approx_kl=0.0070 kl_stop=1 intervention_rate=0.0762 front_blocked=0
[Episode 3820] reward=-48377837.2 actor_loss=0.1106 critic_loss=126366860629.3333 entropy=4.0940 approx_kl=0.0027 kl_stop=1 intervention_rate=0.0807 front_blocked=0
[Eval 3820] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-386907.7 mean_steps=14.6
[Episode 3830] reward=-52201610.9 actor_loss=0.1259 critic_loss=122166203733.3333 entropy=4.0923 approx_kl=0.0042 kl_stop=1 intervention_rate=0.0827 front_blocked=0
[Episode 3840] reward=-59909648.2 actor_loss=0.1270 critic_loss=135065853952.0000 entropy=4.0967 approx_kl=0.0040 kl_stop=1 intervention_rate=0.0911 front_blocked=0
[Eval 3840] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-502579.7 mean_steps=13.8
[Episode 3850] reward=-66768092.7 actor_loss=0.1151 critic_loss=138772253354.6667 entropy=4.0948 approx_kl=0.0070 kl_stop=1 intervention_rate=0.0938 front_blocked=0
[Episode 3860] reward=-69492143.2 actor_loss=0.1372 critic_loss=137394538496.0000 entropy=4.0912 approx_kl=0.0025 kl_stop=1 intervention_rate=0.0964 front_blocked=0
[Eval 3860] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-515730.6 mean_steps=13.2
[Episode 3870] reward=-52900268.9 actor_loss=0.1294 critic_loss=128551457587.2000 entropy=4.0886 approx_kl=0.0069 kl_stop=1 intervention_rate=0.0846 front_blocked=0
[Episode 3880] reward=-56193590.4 actor_loss=0.0787 critic_loss=129273059485.5385 entropy=4.0927 approx_kl=0.0048 kl_stop=1 intervention_rate=0.0710 front_blocked=0
[Eval 3880] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-475476.2 mean_steps=13.5
[Episode 3890] reward=-54976504.7 actor_loss=0.1126 critic_loss=125068926231.2727 entropy=4.0959 approx_kl=0.0041 kl_stop=1 intervention_rate=0.0846 front_blocked=0
[Episode 3900] reward=-54231420.3 actor_loss=0.1222 critic_loss=123001610240.0000 entropy=4.1012 approx_kl=0.0050 kl_stop=1 intervention_rate=0.0846 front_blocked=0
[Eval 3900] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-467109.2 mean_steps=12.7
[Episode 3910] reward=-64090205.9 actor_loss=0.1069 critic_loss=134132095385.6000 entropy=4.0980 approx_kl=0.0041 kl_stop=1 intervention_rate=0.0892 front_blocked=0
[Episode 3920] reward=-64001925.3 actor_loss=0.1106 critic_loss=137713922048.0000 entropy=4.0981 approx_kl=0.0053 kl_stop=1 intervention_rate=0.0833 front_blocked=0
[Eval 3920] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-592112.4 mean_steps=11.9
[Episode 3930] reward=-53839792.7 actor_loss=0.1011 critic_loss=126873172992.0000 entropy=4.0990 approx_kl=0.0037 kl_stop=1 intervention_rate=0.0736 front_blocked=0
[Episode 3940] reward=-55493097.1 actor_loss=0.0893 critic_loss=124319910297.6000 entropy=4.1017 approx_kl=0.0065 kl_stop=1 intervention_rate=0.0879 front_blocked=0
[Eval 3940] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-616637.3 mean_steps=12.3
[Episode 3950] reward=-59290105.7 actor_loss=0.1296 critic_loss=128006591283.2000 entropy=4.1007 approx_kl=0.0054 kl_stop=1 intervention_rate=0.0905 front_blocked=0
[Episode 3960] reward=-62315305.5 actor_loss=0.1301 critic_loss=132157419892.3636 entropy=4.0976 approx_kl=0.0024 kl_stop=1 intervention_rate=0.0892 front_blocked=0
[Eval 3960] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-481372.3 mean_steps=14.3
[Episode 3970] reward=-60863253.6 actor_loss=0.1542 critic_loss=133355641241.6000 entropy=4.0970 approx_kl=0.0067 kl_stop=1 intervention_rate=0.0905 front_blocked=0
[Episode 3980] reward=-53423906.1 actor_loss=0.1025 critic_loss=126696956489.1429 entropy=4.1008 approx_kl=0.0026 kl_stop=1 intervention_rate=0.0775 front_blocked=0
[Eval 3980] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-477759.0 mean_steps=13.6
[Episode 3990] reward=-60493812.1 actor_loss=0.1374 critic_loss=131055615036.2353 entropy=4.1043 approx_kl=0.0046 kl_stop=1 intervention_rate=0.0898 front_blocked=0
[Episode 4000] reward=-62198172.5 actor_loss=0.1397 critic_loss=130449783193.6000 entropy=4.1091 approx_kl=0.0041 kl_stop=1 intervention_rate=0.0898 front_blocked=0
[Eval 4000] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-442170.9 mean_steps=14.1
[Episode 4010] reward=-46635805.6 actor_loss=0.0778 critic_loss=121412273766.4000 entropy=4.1091 approx_kl=0.0065 kl_stop=1 intervention_rate=0.0736 front_blocked=0
[Episode 4020] reward=-46727774.7 actor_loss=0.1295 critic_loss=123054434146.4615 entropy=4.1133 approx_kl=0.0061 kl_stop=1 intervention_rate=0.0840 front_blocked=0
[Eval 4020] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-417104.3 mean_steps=14.1
[Episode 4030] reward=-51564818.4 actor_loss=0.0952 critic_loss=124743612825.6000 entropy=4.1185 approx_kl=0.0048 kl_stop=1 intervention_rate=0.0833 front_blocked=0
[Episode 4040] reward=-57605052.0 actor_loss=0.1202 critic_loss=129309951590.4000 entropy=4.1177 approx_kl=0.0052 kl_stop=1 intervention_rate=0.0788 front_blocked=0
[Eval 4040] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-640720.8 mean_steps=11.8
[Episode 4050] reward=-62450945.9 actor_loss=0.0999 critic_loss=134795425382.4000 entropy=4.1176 approx_kl=0.0024 kl_stop=1 intervention_rate=0.0879 front_blocked=0
[Episode 4060] reward=-60749827.8 actor_loss=0.1057 critic_loss=129874211271.1111 entropy=4.1182 approx_kl=0.0056 kl_stop=1 intervention_rate=0.0911 front_blocked=0
[Eval 4060] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-470914.2 mean_steps=12.5
[Episode 4070] reward=-62468218.8 actor_loss=0.1451 critic_loss=134368903168.0000 entropy=4.1167 approx_kl=0.0052 kl_stop=1 intervention_rate=0.0879 front_blocked=0
[Episode 4080] reward=-57136469.7 actor_loss=0.1373 critic_loss=128605208576.0000 entropy=4.1206 approx_kl=0.0047 kl_stop=1 intervention_rate=0.0898 front_blocked=0
[Eval 4080] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-595648.8 mean_steps=12.0
[Episode 4090] reward=-57070303.2 actor_loss=0.0984 critic_loss=128122475008.0000 entropy=4.1225 approx_kl=0.0038 kl_stop=1 intervention_rate=0.0794 front_blocked=0
[Episode 4100] reward=-56426036.1 actor_loss=0.1218 critic_loss=126066364416.0000 entropy=4.1225 approx_kl=0.0037 kl_stop=1 intervention_rate=0.0885 front_blocked=0
[Eval 4100] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-483476.0 mean_steps=12.7
[Episode 4110] reward=-62481124.2 actor_loss=0.1473 critic_loss=130541554346.6667 entropy=4.1223 approx_kl=0.0046 kl_stop=1 intervention_rate=0.0938 front_blocked=0
[Episode 4120] reward=-51987212.4 actor_loss=0.1075 critic_loss=127838326272.0000 entropy=4.1207 approx_kl=0.0055 kl_stop=1 intervention_rate=0.0807 front_blocked=0
[Eval 4120] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-484170.3 mean_steps=14.4
[Episode 4130] reward=-55736606.4 actor_loss=0.1374 critic_loss=124748286361.6000 entropy=4.1192 approx_kl=0.0056 kl_stop=1 intervention_rate=0.0911 front_blocked=0
[Episode 4140] reward=-54830081.9 actor_loss=0.1196 critic_loss=125079334912.0000 entropy=4.1166 approx_kl=0.0026 kl_stop=1 intervention_rate=0.0859 front_blocked=0
[Eval 4140] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-481631.4 mean_steps=13.3
[Episode 4150] reward=-54327248.5 actor_loss=0.1284 critic_loss=130006717001.1429 entropy=4.1198 approx_kl=0.0031 kl_stop=1 intervention_rate=0.0872 front_blocked=0
[Episode 4160] reward=-58382122.2 actor_loss=0.1289 critic_loss=127686924697.6000 entropy=4.1191 approx_kl=0.0043 kl_stop=1 intervention_rate=0.0924 front_blocked=0
[Eval 4160] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-583176.5 mean_steps=12.7
[Episode 4170] reward=-43980276.5 actor_loss=0.0984 critic_loss=122600253849.6000 entropy=4.1232 approx_kl=0.0080 kl_stop=1 intervention_rate=0.0736 front_blocked=0
[Episode 4180] reward=-55935732.4 actor_loss=0.1114 critic_loss=129040779946.6667 entropy=4.1229 approx_kl=0.0058 kl_stop=1 intervention_rate=0.0833 front_blocked=0
[Eval 4180] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-553794.4 mean_steps=12.5
[Episode 4190] reward=-49068589.6 actor_loss=0.1022 critic_loss=122114692096.0000 entropy=4.1197 approx_kl=0.0044 kl_stop=1 intervention_rate=0.0755 front_blocked=0
[Episode 4200] reward=-55833681.5 actor_loss=0.1449 critic_loss=128085652275.2000 entropy=4.1191 approx_kl=0.0057 kl_stop=1 intervention_rate=0.0964 front_blocked=0
[Eval 4200] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-407802.9 mean_steps=13.0
[Episode 4210] reward=-47937741.7 actor_loss=0.0921 critic_loss=125000182784.0000 entropy=4.1203 approx_kl=0.0049 kl_stop=1 intervention_rate=0.0794 front_blocked=0
[Episode 4220] reward=-53686830.7 actor_loss=0.1161 critic_loss=127798213017.6000 entropy=4.1193 approx_kl=0.0043 kl_stop=1 intervention_rate=0.0892 front_blocked=0
[Eval 4220] success_rate=0.100 qp_infeasible_rate=0.900 mean_return=-672965.8 mean_steps=10.2
[Episode 4230] reward=-48455058.5 actor_loss=0.0997 critic_loss=125291158186.6667 entropy=4.1194 approx_kl=0.0055 kl_stop=1 intervention_rate=0.0859 front_blocked=0
[Episode 4240] reward=-58283448.1 actor_loss=0.1146 critic_loss=128148138170.1818 entropy=4.1212 approx_kl=0.0059 kl_stop=1 intervention_rate=0.0814 front_blocked=0
[Eval 4240] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-536841.9 mean_steps=13.3
[Episode 4250] reward=-61866714.9 actor_loss=0.1153 critic_loss=138323019962.1818 entropy=4.1214 approx_kl=0.0049 kl_stop=1 intervention_rate=0.0840 front_blocked=0
[Episode 4260] reward=-52453255.8 actor_loss=0.1177 critic_loss=125844010535.3846 entropy=4.1198 approx_kl=0.0051 kl_stop=1 intervention_rate=0.0801 front_blocked=0
[Eval 4260] success_rate=0.200 qp_infeasible_rate=0.800 mean_return=-620980.9 mean_steps=11.3
[Episode 4270] reward=-60588495.2 actor_loss=0.1199 critic_loss=132723465420.8000 entropy=4.1220 approx_kl=0.0048 kl_stop=1 intervention_rate=0.0872 front_blocked=0
[Episode 4280] reward=-56866143.0 actor_loss=0.0976 critic_loss=128239333376.0000 entropy=4.1210 approx_kl=0.0028 kl_stop=1 intervention_rate=0.0898 front_blocked=0
[Eval 4280] success_rate=0.650 qp_infeasible_rate=0.350 mean_return=-284323.2 mean_steps=16.4
[Episode 4290] reward=-50760524.0 actor_loss=0.0845 critic_loss=129335826944.0000 entropy=4.1228 approx_kl=0.0076 kl_stop=1 intervention_rate=0.0742 front_blocked=0
[Episode 4300] reward=-50507507.2 actor_loss=0.1059 critic_loss=126021595955.2000 entropy=4.1196 approx_kl=0.0037 kl_stop=1 intervention_rate=0.0807 front_blocked=0
[Eval 4300] success_rate=0.100 qp_infeasible_rate=0.900 mean_return=-738758.5 mean_steps=10.7
[Episode 4310] reward=-50027494.9 actor_loss=0.1164 critic_loss=121513225122.9091 entropy=4.1193 approx_kl=0.0034 kl_stop=1 intervention_rate=0.0807 front_blocked=0
[Episode 4320] reward=-56969494.9 actor_loss=0.0976 critic_loss=130517081292.8000 entropy=4.1203 approx_kl=0.0046 kl_stop=1 intervention_rate=0.0768 front_blocked=0
[Eval 4320] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-557973.7 mean_steps=11.9
[Episode 4330] reward=-68309630.1 actor_loss=0.1093 critic_loss=139466962944.0000 entropy=4.1234 approx_kl=0.0037 kl_stop=1 intervention_rate=0.0866 front_blocked=0
[Episode 4340] reward=-49975870.9 actor_loss=0.1017 critic_loss=130101296128.0000 entropy=4.1238 approx_kl=0.0036 kl_stop=1 intervention_rate=0.0814 front_blocked=0
[Eval 4340] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-537982.2 mean_steps=12.5
[Episode 4350] reward=-50502357.5 actor_loss=0.0970 critic_loss=127731988707.5556 entropy=4.1234 approx_kl=0.0048 kl_stop=1 intervention_rate=0.0807 front_blocked=0
[Episode 4360] reward=-52319398.1 actor_loss=0.0970 critic_loss=128297577130.6667 entropy=4.1252 approx_kl=0.0043 kl_stop=1 intervention_rate=0.0807 front_blocked=0
[Eval 4360] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-586103.7 mean_steps=11.9
[Episode 4370] reward=-53952800.7 actor_loss=0.1302 critic_loss=125969102438.4000 entropy=4.1264 approx_kl=0.0049 kl_stop=1 intervention_rate=0.0827 front_blocked=0
[Episode 4380] reward=-48915415.2 actor_loss=0.1082 critic_loss=127859472942.5455 entropy=4.1247 approx_kl=0.0088 kl_stop=1 intervention_rate=0.0820 front_blocked=0
[Eval 4380] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-520395.0 mean_steps=13.2
[Episode 4390] reward=-57889056.9 actor_loss=0.1188 critic_loss=132108530483.2000 entropy=4.1248 approx_kl=0.0034 kl_stop=1 intervention_rate=0.0866 front_blocked=0
[Episode 4400] reward=-53950330.2 actor_loss=0.1353 critic_loss=126754913621.3333 entropy=4.1266 approx_kl=0.0080 kl_stop=1 intervention_rate=0.0833 front_blocked=0
[Eval 4400] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-429827.9 mean_steps=13.4
[Episode 4410] reward=-52562420.0 actor_loss=0.1147 critic_loss=125672388380.4444 entropy=4.1285 approx_kl=0.0067 kl_stop=1 intervention_rate=0.0801 front_blocked=0
[Episode 4420] reward=-55082154.5 actor_loss=0.1094 critic_loss=127409347118.5455 entropy=4.1315 approx_kl=0.0050 kl_stop=1 intervention_rate=0.0911 front_blocked=0
[Eval 4420] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-559782.3 mean_steps=11.7
[Episode 4430] reward=-41805801.8 actor_loss=0.1077 critic_loss=117293934080.0000 entropy=4.1323 approx_kl=0.0043 kl_stop=1 intervention_rate=0.0684 front_blocked=0
[Episode 4440] reward=-47460415.9 actor_loss=0.0945 critic_loss=122573817344.0000 entropy=4.1276 approx_kl=0.0046 kl_stop=1 intervention_rate=0.0749 front_blocked=0
[Eval 4440] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-565388.7 mean_steps=13.4
[Episode 4450] reward=-54379066.4 actor_loss=0.1219 critic_loss=126693777954.1333 entropy=4.1283 approx_kl=0.0067 kl_stop=1 intervention_rate=0.0872 front_blocked=0
[Episode 4460] reward=-48726354.7 actor_loss=0.0887 critic_loss=121462543701.3333 entropy=4.1284 approx_kl=0.0024 kl_stop=1 intervention_rate=0.0762 front_blocked=0
[Eval 4460] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-427988.6 mean_steps=13.0
[Episode 4470] reward=-47781865.0 actor_loss=0.1214 critic_loss=119730648244.7059 entropy=4.1260 approx_kl=0.0056 kl_stop=1 intervention_rate=0.0794 front_blocked=0
[Episode 4480] reward=-46286975.8 actor_loss=0.1116 critic_loss=122706218496.0000 entropy=4.1236 approx_kl=0.0036 kl_stop=1 intervention_rate=0.0840 front_blocked=0
[Eval 4480] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-469680.3 mean_steps=12.8
[Episode 4490] reward=-56632897.1 actor_loss=0.1440 critic_loss=128248669798.4000 entropy=4.1210 approx_kl=0.0030 kl_stop=1 intervention_rate=0.0840 front_blocked=0
[Episode 4500] reward=-66850647.7 actor_loss=0.1234 critic_loss=137612395178.6667 entropy=4.1175 approx_kl=0.0059 kl_stop=1 intervention_rate=0.0924 front_blocked=0
[Eval 4500] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-439694.9 mean_steps=13.3
[Episode 4510] reward=-62710587.2 actor_loss=0.1530 critic_loss=129730790865.4545 entropy=4.1161 approx_kl=0.0044 kl_stop=1 intervention_rate=0.0970 front_blocked=0
[Episode 4520] reward=-59546641.8 actor_loss=0.1367 critic_loss=128531547136.0000 entropy=4.1161 approx_kl=0.0036 kl_stop=1 intervention_rate=0.0866 front_blocked=0
[Eval 4520] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-301849.0 mean_steps=14.8
[Episode 4530] reward=-49048776.2 actor_loss=0.1056 critic_loss=122078195029.3333 entropy=4.1176 approx_kl=0.0063 kl_stop=1 intervention_rate=0.0820 front_blocked=0
[Episode 4540] reward=-59520626.9 actor_loss=0.1366 critic_loss=131066743076.5714 entropy=4.1164 approx_kl=0.0024 kl_stop=1 intervention_rate=0.0938 front_blocked=0
[Eval 4540] success_rate=0.550 qp_infeasible_rate=0.450 mean_return=-336848.2 mean_steps=15.1
[Episode 4550] reward=-55270929.6 actor_loss=0.1034 critic_loss=131865408307.2000 entropy=4.1166 approx_kl=0.0039 kl_stop=1 intervention_rate=0.0794 front_blocked=0
[Episode 4560] reward=-51354376.2 actor_loss=0.1332 critic_loss=125261620019.2000 entropy=4.1172 approx_kl=0.0048 kl_stop=1 intervention_rate=0.0859 front_blocked=0
[Eval 4560] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-407396.8 mean_steps=13.2
[Episode 4570] reward=-49116423.7 actor_loss=0.1233 critic_loss=125675112155.4286 entropy=4.1195 approx_kl=0.0052 kl_stop=1 intervention_rate=0.0814 front_blocked=0
[Episode 4580] reward=-54222554.8 actor_loss=0.1177 critic_loss=124105314304.0000 entropy=4.1210 approx_kl=0.0040 kl_stop=1 intervention_rate=0.0781 front_blocked=0
[Eval 4580] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-422721.3 mean_steps=14.3
[Episode 4590] reward=-57579527.6 actor_loss=0.0937 critic_loss=131095098982.4000 entropy=4.1202 approx_kl=0.0036 kl_stop=1 intervention_rate=0.0859 front_blocked=0
[Episode 4600] reward=-62956738.6 actor_loss=0.1287 critic_loss=131414479494.7368 entropy=4.1168 approx_kl=0.0053 kl_stop=1 intervention_rate=0.0924 front_blocked=0
[Eval 4600] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-563451.9 mean_steps=12.4
[Episode 4610] reward=-62186724.5 actor_loss=0.1076 critic_loss=130454753735.1111 entropy=4.1183 approx_kl=0.0047 kl_stop=1 intervention_rate=0.0892 front_blocked=0
[Episode 4620] reward=-52128679.2 actor_loss=0.1468 critic_loss=126564601400.8889 entropy=4.1185 approx_kl=0.0031 kl_stop=1 intervention_rate=0.0833 front_blocked=0
[Eval 4620] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-536966.3 mean_steps=12.3
[Episode 4630] reward=-60742225.3 actor_loss=0.1119 critic_loss=129329815552.0000 entropy=4.1220 approx_kl=0.0022 kl_stop=1 intervention_rate=0.0820 front_blocked=0
[Episode 4640] reward=-51699908.0 actor_loss=0.1052 critic_loss=125680841289.1429 entropy=4.1281 approx_kl=0.0053 kl_stop=1 intervention_rate=0.0794 front_blocked=0
[Eval 4640] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-489000.9 mean_steps=13.9
[Episode 4650] reward=-52415031.0 actor_loss=0.0838 critic_loss=125883487027.2000 entropy=4.1286 approx_kl=0.0049 kl_stop=1 intervention_rate=0.0775 front_blocked=0
[Episode 4660] reward=-52513898.9 actor_loss=0.1186 critic_loss=127984569995.6364 entropy=4.1308 approx_kl=0.0031 kl_stop=1 intervention_rate=0.0853 front_blocked=0
[Eval 4660] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-566030.9 mean_steps=12.4
[Episode 4670] reward=-52672454.0 actor_loss=0.0892 critic_loss=127284112952.8889 entropy=4.1281 approx_kl=0.0071 kl_stop=1 intervention_rate=0.0762 front_blocked=0
[Episode 4680] reward=-58662381.0 actor_loss=0.1132 critic_loss=125859713843.2000 entropy=4.1252 approx_kl=0.0049 kl_stop=1 intervention_rate=0.0840 front_blocked=0
[Eval 4680] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-442074.5 mean_steps=12.6
[Episode 4690] reward=-59984709.9 actor_loss=0.1551 critic_loss=128677030570.6667 entropy=4.1235 approx_kl=0.0021 kl_stop=1 intervention_rate=0.0872 front_blocked=0
[Episode 4700] reward=-46342497.8 actor_loss=0.1024 critic_loss=122800933888.0000 entropy=4.1269 approx_kl=0.0030 kl_stop=1 intervention_rate=0.0781 front_blocked=0
[Eval 4700] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-559751.1 mean_steps=12.3
[Episode 4710] reward=-58747286.9 actor_loss=0.1346 critic_loss=129228212906.6667 entropy=4.1297 approx_kl=0.0080 kl_stop=1 intervention_rate=0.0846 front_blocked=0
[Episode 4720] reward=-58052485.8 actor_loss=0.1357 critic_loss=129106477738.6667 entropy=4.1302 approx_kl=0.0037 kl_stop=1 intervention_rate=0.0898 front_blocked=0
[Eval 4720] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-522845.2 mean_steps=13.1
[Episode 4730] reward=-49874915.1 actor_loss=0.1091 critic_loss=125429434504.5333 entropy=4.1291 approx_kl=0.0030 kl_stop=1 intervention_rate=0.0820 front_blocked=0
[Episode 4740] reward=-50821410.4 actor_loss=0.1396 critic_loss=121714745929.1429 entropy=4.1291 approx_kl=0.0055 kl_stop=1 intervention_rate=0.0905 front_blocked=0
[Eval 4740] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-446992.4 mean_steps=13.3
[Episode 4750] reward=-44927593.2 actor_loss=0.1065 critic_loss=121524351431.1111 entropy=4.1327 approx_kl=0.0040 kl_stop=1 intervention_rate=0.0742 front_blocked=0
[Episode 4760] reward=-54394978.1 actor_loss=0.1073 critic_loss=128342116713.4118 entropy=4.1329 approx_kl=0.0032 kl_stop=1 intervention_rate=0.0859 front_blocked=0
[Eval 4760] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-501220.5 mean_steps=13.8
[Episode 4770] reward=-54608501.0 actor_loss=0.1359 critic_loss=130001197348.5714 entropy=4.1324 approx_kl=0.0041 kl_stop=1 intervention_rate=0.0814 front_blocked=0
[Episode 4780] reward=-42714095.3 actor_loss=0.1660 critic_loss=117736694784.0000 entropy=4.1333 approx_kl=0.0041 kl_stop=1 intervention_rate=0.0775 front_blocked=0
[Eval 4780] success_rate=0.300 qp_infeasible_rate=0.700 mean_return=-539528.5 mean_steps=12.4
[Episode 4790] reward=-56486982.7 actor_loss=0.1342 critic_loss=133219061387.6364 entropy=4.1326 approx_kl=0.0079 kl_stop=1 intervention_rate=0.0866 front_blocked=0
[Episode 4800] reward=-51296394.1 actor_loss=0.1115 critic_loss=122256703488.0000 entropy=4.1353 approx_kl=0.0029 kl_stop=1 intervention_rate=0.0859 front_blocked=0
[Eval 4800] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-405908.3 mean_steps=14.4
[Episode 4810] reward=-62310808.5 actor_loss=0.1189 critic_loss=132457926070.8571 entropy=4.1329 approx_kl=0.0051 kl_stop=1 intervention_rate=0.0885 front_blocked=0
[Episode 4820] reward=-53589711.6 actor_loss=0.1163 critic_loss=124826475373.7143 entropy=4.1313 approx_kl=0.0030 kl_stop=1 intervention_rate=0.0892 front_blocked=0
[Eval 4820] success_rate=0.400 qp_infeasible_rate=0.600 mean_return=-470995.4 mean_steps=13.3
[Episode 4830] reward=-59566619.0 actor_loss=0.0992 critic_loss=126435410830.2222 entropy=4.1316 approx_kl=0.0031 kl_stop=1 intervention_rate=0.0846 front_blocked=0
[Episode 4840] reward=-58572659.4 actor_loss=0.1292 critic_loss=127221364893.5385 entropy=4.1353 approx_kl=0.0049 kl_stop=1 intervention_rate=0.0885 front_blocked=0
[Eval 4840] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-544471.4 mean_steps=11.8
[Episode 4850] reward=-50430807.7 actor_loss=0.1066 critic_loss=123517674382.2222 entropy=4.1373 approx_kl=0.0056 kl_stop=1 intervention_rate=0.0775 front_blocked=0
[Episode 4860] reward=-63666668.8 actor_loss=0.1606 critic_loss=135622477414.4000 entropy=4.1398 approx_kl=0.0038 kl_stop=1 intervention_rate=0.0911 front_blocked=0
[Eval 4860] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-596196.5 mean_steps=11.8
[Episode 4870] reward=-51079855.1 actor_loss=0.1156 critic_loss=124802224128.0000 entropy=4.1431 approx_kl=0.0056 kl_stop=1 intervention_rate=0.0885 front_blocked=0
[Episode 4880] reward=-64204411.8 actor_loss=0.1232 critic_loss=129806574478.2222 entropy=4.1451 approx_kl=0.0043 kl_stop=1 intervention_rate=0.0924 front_blocked=0
[Eval 4880] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-394216.2 mean_steps=14.6
[Episode 4890] reward=-56817854.1 actor_loss=0.1263 critic_loss=125392776098.9091 entropy=4.1443 approx_kl=0.0075 kl_stop=1 intervention_rate=0.0866 front_blocked=0
[Episode 4900] reward=-58616323.0 actor_loss=0.1557 critic_loss=130753422950.4000 entropy=4.1474 approx_kl=0.0061 kl_stop=1 intervention_rate=0.0853 front_blocked=0
[Eval 4900] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-617015.5 mean_steps=12.0
[Episode 4910] reward=-62532833.0 actor_loss=0.1236 critic_loss=134321085018.3529 entropy=4.1519 approx_kl=0.0051 kl_stop=1 intervention_rate=0.0938 front_blocked=0
[Episode 4920] reward=-47844791.7 actor_loss=0.0908 critic_loss=123204768621.7143 entropy=4.1565 approx_kl=0.0052 kl_stop=1 intervention_rate=0.0729 front_blocked=0
[Eval 4920] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-492551.9 mean_steps=12.9
[Episode 4930] reward=-67788092.0 actor_loss=0.0979 critic_loss=139183494981.8182 entropy=4.1574 approx_kl=0.0044 kl_stop=1 intervention_rate=0.0866 front_blocked=0
[Episode 4940] reward=-61052730.9 actor_loss=0.1473 critic_loss=132327568384.0000 entropy=4.1599 approx_kl=0.0045 kl_stop=1 intervention_rate=0.0885 front_blocked=0
[Eval 4940] success_rate=0.250 qp_infeasible_rate=0.750 mean_return=-528076.2 mean_steps=11.7
[Episode 4950] reward=-58354309.9 actor_loss=0.1239 critic_loss=128313815859.2000 entropy=4.1608 approx_kl=0.0027 kl_stop=1 intervention_rate=0.0814 front_blocked=0
[Episode 4960] reward=-68368265.0 actor_loss=0.1231 critic_loss=138177834635.6364 entropy=4.1636 approx_kl=0.0049 kl_stop=1 intervention_rate=0.0990 front_blocked=0
[Eval 4960] success_rate=0.500 qp_infeasible_rate=0.500 mean_return=-382060.3 mean_steps=14.4
[Episode 4970] reward=-58262408.6 actor_loss=0.1540 critic_loss=128692558848.0000 entropy=4.1642 approx_kl=0.0046 kl_stop=1 intervention_rate=0.0911 front_blocked=0
[Episode 4980] reward=-58422317.9 actor_loss=0.1082 critic_loss=129177792512.0000 entropy=4.1679 approx_kl=0.0040 kl_stop=1 intervention_rate=0.0794 front_blocked=0
[Eval 4980] success_rate=0.350 qp_infeasible_rate=0.650 mean_return=-542308.2 mean_steps=13.1
[Episode 4990] reward=-56035494.2 actor_loss=0.1227 critic_loss=124198971578.1818 entropy=4.1671 approx_kl=0.0043 kl_stop=1 intervention_rate=0.0905 front_blocked=0
[Episode 5000] reward=-51527823.4 actor_loss=0.1018 critic_loss=120994848768.0000 entropy=4.1617 approx_kl=0.0043 kl_stop=1 intervention_rate=0.0853 front_blocked=0
[Eval 5000] success_rate=0.450 qp_infeasible_rate=0.550 mean_return=-387846.9 mean_steps=14.0